SirajRLX commited on
Commit
a555835
·
verified ·
1 Parent(s): e483cf3

Add Devstral-14B CPT training run

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. cpt_devstral_24B/best_adapter/README.md +207 -0
  3. cpt_devstral_24B/best_adapter/adapter_config.json +43 -0
  4. cpt_devstral_24B/best_adapter/adapter_model.safetensors +3 -0
  5. cpt_devstral_24B/best_adapter/chat_template.jinja +121 -0
  6. cpt_devstral_24B/best_adapter/tokenizer.json +3 -0
  7. cpt_devstral_24B/best_adapter/tokenizer_config.json +1013 -0
  8. cpt_devstral_24B/best_adapter/training_args.bin +3 -0
  9. cpt_devstral_24B/checkpoints/checkpoint-400/README.md +207 -0
  10. cpt_devstral_24B/checkpoints/checkpoint-400/adapter_config.json +43 -0
  11. cpt_devstral_24B/checkpoints/checkpoint-400/adapter_model.safetensors +3 -0
  12. cpt_devstral_24B/checkpoints/checkpoint-400/chat_template.jinja +121 -0
  13. cpt_devstral_24B/checkpoints/checkpoint-400/optimizer.pt +3 -0
  14. cpt_devstral_24B/checkpoints/checkpoint-400/rng_state.pth +3 -0
  15. cpt_devstral_24B/checkpoints/checkpoint-400/scheduler.pt +3 -0
  16. cpt_devstral_24B/checkpoints/checkpoint-400/tokenizer.json +3 -0
  17. cpt_devstral_24B/checkpoints/checkpoint-400/tokenizer_config.json +1013 -0
  18. cpt_devstral_24B/checkpoints/checkpoint-400/trainer_state.json +2898 -0
  19. cpt_devstral_24B/checkpoints/checkpoint-400/training_args.bin +3 -0
  20. cpt_devstral_24B/checkpoints/checkpoint-500/README.md +207 -0
  21. cpt_devstral_24B/checkpoints/checkpoint-500/adapter_config.json +43 -0
  22. cpt_devstral_24B/checkpoints/checkpoint-500/adapter_model.safetensors +3 -0
  23. cpt_devstral_24B/checkpoints/checkpoint-500/chat_template.jinja +121 -0
  24. cpt_devstral_24B/checkpoints/checkpoint-500/optimizer.pt +3 -0
  25. cpt_devstral_24B/checkpoints/checkpoint-500/rng_state.pth +3 -0
  26. cpt_devstral_24B/checkpoints/checkpoint-500/scheduler.pt +3 -0
  27. cpt_devstral_24B/checkpoints/checkpoint-500/tokenizer.json +3 -0
  28. cpt_devstral_24B/checkpoints/checkpoint-500/tokenizer_config.json +1013 -0
  29. cpt_devstral_24B/checkpoints/checkpoint-500/trainer_state.json +3614 -0
  30. cpt_devstral_24B/checkpoints/checkpoint-500/training_args.bin +3 -0
  31. cpt_devstral_24B/checkpoints/checkpoint-600/README.md +207 -0
  32. cpt_devstral_24B/checkpoints/checkpoint-600/adapter_config.json +43 -0
  33. cpt_devstral_24B/checkpoints/checkpoint-600/adapter_model.safetensors +3 -0
  34. cpt_devstral_24B/checkpoints/checkpoint-600/chat_template.jinja +121 -0
  35. cpt_devstral_24B/checkpoints/checkpoint-600/optimizer.pt +3 -0
  36. cpt_devstral_24B/checkpoints/checkpoint-600/rng_state.pth +3 -0
  37. cpt_devstral_24B/checkpoints/checkpoint-600/scheduler.pt +3 -0
  38. cpt_devstral_24B/checkpoints/checkpoint-600/tokenizer.json +3 -0
  39. cpt_devstral_24B/checkpoints/checkpoint-600/tokenizer_config.json +1013 -0
  40. cpt_devstral_24B/checkpoints/checkpoint-600/trainer_state.json +0 -0
  41. cpt_devstral_24B/checkpoints/checkpoint-600/training_args.bin +3 -0
  42. cpt_devstral_24B/checkpoints/checkpoint-686/README.md +207 -0
  43. cpt_devstral_24B/checkpoints/checkpoint-686/adapter_config.json +43 -0
  44. cpt_devstral_24B/checkpoints/checkpoint-686/adapter_model.safetensors +3 -0
  45. cpt_devstral_24B/checkpoints/checkpoint-686/chat_template.jinja +121 -0
  46. cpt_devstral_24B/checkpoints/checkpoint-686/optimizer.pt +3 -0
  47. cpt_devstral_24B/checkpoints/checkpoint-686/rng_state.pth +3 -0
  48. cpt_devstral_24B/checkpoints/checkpoint-686/scheduler.pt +3 -0
  49. cpt_devstral_24B/checkpoints/checkpoint-686/tokenizer.json +3 -0
  50. cpt_devstral_24B/checkpoints/checkpoint-686/tokenizer_config.json +1013 -0
.gitattributes CHANGED
@@ -43,3 +43,8 @@ cpt_qwen_14B/checkpoints/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge
43
  cpt_qwen_14B/checkpoints/checkpoint-656/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
  cpt_qwen_14B/wandb/offline-run-20251223_125436-g6vlcw0j/run-g6vlcw0j.wandb filter=lfs diff=lfs merge=lfs -text
45
  sft_qwen_14B/wandb/run-20251223_142702-ldjr67u6/run-ldjr67u6.wandb filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
43
  cpt_qwen_14B/checkpoints/checkpoint-656/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
  cpt_qwen_14B/wandb/offline-run-20251223_125436-g6vlcw0j/run-g6vlcw0j.wandb filter=lfs diff=lfs merge=lfs -text
45
  sft_qwen_14B/wandb/run-20251223_142702-ldjr67u6/run-ldjr67u6.wandb filter=lfs diff=lfs merge=lfs -text
46
+ cpt_devstral_24B/best_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
+ cpt_devstral_24B/checkpoints/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
+ cpt_devstral_24B/checkpoints/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ cpt_devstral_24B/checkpoints/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
+ cpt_devstral_24B/checkpoints/checkpoint-686/tokenizer.json filter=lfs diff=lfs merge=lfs -text
cpt_devstral_24B/best_adapter/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /workspace/Models/Devstral-Small-2-24B-Instruct-2512
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/workspace/Models/Devstral-Small-2-24B-Instruct-2512
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.18.0
cpt_devstral_24B/best_adapter/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "/workspace/Models/Devstral-Small-2-24B-Instruct-2512",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 128,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "k_proj",
33
+ "o_proj",
34
+ "q_proj",
35
+ "v_proj"
36
+ ],
37
+ "target_parameters": null,
38
+ "task_type": "CAUSAL_LM",
39
+ "trainable_token_indices": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
cpt_devstral_24B/best_adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6528dd74de4fce9bff6c944acd9bc01868d155b1ea5403fe93fb8c5ced4d4ec
3
+ size 364983848
cpt_devstral_24B/best_adapter/chat_template.jinja ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {#- Default system message if no system prompt is passed. #}
2
+ {%- set default_system_message = '' %}
3
+
4
+ {#- Begin of sequence token. #}
5
+ {{- bos_token }}
6
+
7
+ {#- Handle system prompt if it exists. #}
8
+ {#- System prompt supports text content or text chunks. #}
9
+ {%- if messages[0]['role'] == 'system' %}
10
+ {{- '[SYSTEM_PROMPT]' -}}
11
+ {%- if messages[0]['content'] is string %}
12
+ {{- messages[0]['content'] -}}
13
+ {%- else %}
14
+ {%- for block in messages[0]['content'] %}
15
+ {%- if block['type'] == 'text' %}
16
+ {{- block['text'] }}
17
+ {%- else %}
18
+ {{- raise_exception('Only text chunks are supported in system message contents.') }}
19
+ {%- endif %}
20
+ {%- endfor %}
21
+ {%- endif %}
22
+ {{- '[/SYSTEM_PROMPT]' -}}
23
+ {%- set loop_messages = messages[1:] %}
24
+ {%- else %}
25
+ {%- set loop_messages = messages %}
26
+ {%- if default_system_message != '' %}
27
+ {{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }}
28
+ {%- endif %}
29
+ {%- endif %}
30
+
31
+
32
+ {#- Tools definition #}
33
+ {%- set tools_definition = '' %}
34
+ {%- set has_tools = false %}
35
+ {%- if tools is defined and tools is not none and tools|length > 0 %}
36
+ {%- set has_tools = true %}
37
+ {%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %}
38
+ {{- tools_definition }}
39
+ {%- endif %}
40
+
41
+ {#- Checks for alternating user/assistant messages. #}
42
+ {%- set ns = namespace(index=0) %}
43
+ {%- for message in loop_messages %}
44
+ {%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %}
45
+ {%- if (message['role'] == 'user') != (ns.index % 2 == 0) %}
46
+ {{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }}
47
+ {%- endif %}
48
+ {%- set ns.index = ns.index + 1 %}
49
+ {%- endif %}
50
+ {%- endfor %}
51
+
52
+ {#- Handle conversation messages. #}
53
+ {%- for message in loop_messages %}
54
+
55
+ {#- User messages supports text content or text and image chunks. #}
56
+ {%- if message['role'] == 'user' %}
57
+ {%- if message['content'] is string %}
58
+ {{- '[INST]' + message['content'] + '[/INST]' }}
59
+ {%- elif message['content'] | length > 0 %}
60
+ {{- '[INST]' }}
61
+ {%- if message['content'] | length == 2 %}
62
+ {%- set blocks = message['content'] | sort(attribute='type') %}
63
+ {%- else %}
64
+ {%- set blocks = message['content'] %}
65
+ {%- endif %}
66
+ {%- for block in blocks %}
67
+ {%- if block['type'] == 'text' %}
68
+ {{- block['text'] }}
69
+ {%- elif block['type'] in ['image', 'image_url'] %}
70
+ {{- '[IMG]' }}
71
+ {%- else %}
72
+ {{- raise_exception('Only text, image and image_url chunks are supported in user message content.') }}
73
+ {%- endif %}
74
+ {%- endfor %}
75
+ {{- '[/INST]' }}
76
+ {%- else %}
77
+ {{- raise_exception('User message must have a string or a list of chunks in content') }}
78
+ {%- endif %}
79
+
80
+ {#- Assistant messages supports text content or text and image chunks. #}
81
+ {%- elif message['role'] == 'assistant' %}
82
+ {%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}
83
+ {{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }}
84
+ {%- endif %}
85
+
86
+ {%- if message['content'] is string %}
87
+ {{- message['content'] }}
88
+ {%- elif message['content'] | length > 0 %}
89
+ {%- for block in message['content'] %}
90
+ {%- if block['type'] == 'text' %}
91
+ {{- block['text'] }}
92
+ {%- else %}
93
+ {{- raise_exception('Only text chunks are supported in assistant message contents.') }}
94
+ {%- endif %}
95
+ {%- endfor %}
96
+ {%- endif %}
97
+
98
+ {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %}
99
+ {%- for tool in message['tool_calls'] %}
100
+ {%- set arguments = tool['function']['arguments'] %}
101
+ {%- if arguments is not string %}
102
+ {%- set arguments = arguments|tojson|safe %}
103
+ {%- elif arguments == '' %}
104
+ {%- set arguments = '{}' %}
105
+ {%- endif %}
106
+ {{- '[TOOL_CALLS]' + tool['function']['name'] + '[ARGS]' + arguments }}
107
+ {%- endfor %}
108
+ {%- endif %}
109
+
110
+ {#- End of sequence token for each assistant messages. #}
111
+ {{- eos_token }}
112
+
113
+ {#- Tool messages only supports text content. #}
114
+ {%- elif message['role'] == 'tool' %}
115
+ {{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }}
116
+
117
+ {#- Raise exception for unsupported roles. #}
118
+ {%- else %}
119
+ {{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }}
120
+ {%- endif %}
121
+ {%- endfor %}
cpt_devstral_24B/best_adapter/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286acad9b0e27fce778ac429763536accf618ccb6ed72963b6f94685e531c5c7
3
+ size 17077402
cpt_devstral_24B/best_adapter/tokenizer_config.json ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "extra_special_tokens": [
6
+ "<unk>",
7
+ "<s>",
8
+ "</s>",
9
+ "[INST]",
10
+ "[/INST]",
11
+ "[AVAILABLE_TOOLS]",
12
+ "[/AVAILABLE_TOOLS]",
13
+ "[TOOL_RESULTS]",
14
+ "[/TOOL_RESULTS]",
15
+ "[TOOL_CALLS]",
16
+ "[IMG]",
17
+ "<pad>",
18
+ "[IMG_BREAK]",
19
+ "[IMG_END]",
20
+ "[PREFIX]",
21
+ "[MIDDLE]",
22
+ "[SUFFIX]",
23
+ "[SYSTEM_PROMPT]",
24
+ "[/SYSTEM_PROMPT]",
25
+ "[TOOL_CONTENT]",
26
+ "<SPECIAL_20>",
27
+ "<SPECIAL_21>",
28
+ "<SPECIAL_22>",
29
+ "<SPECIAL_23>",
30
+ "[AUDIO]",
31
+ "[BEGIN_AUDIO]",
32
+ "<SPECIAL_26>",
33
+ "<SPECIAL_27>",
34
+ "<SPECIAL_28>",
35
+ "<SPECIAL_29>",
36
+ "<SPECIAL_30>",
37
+ "<SPECIAL_31>",
38
+ "[ARGS]",
39
+ "[CALL_ID]",
40
+ "[THINK]",
41
+ "[/THINK]",
42
+ "<SPECIAL_36>",
43
+ "<SPECIAL_37>",
44
+ "<SPECIAL_38>",
45
+ "<SPECIAL_39>",
46
+ "<SPECIAL_40>",
47
+ "<SPECIAL_41>",
48
+ "<SPECIAL_42>",
49
+ "<SPECIAL_43>",
50
+ "<SPECIAL_44>",
51
+ "<SPECIAL_45>",
52
+ "<SPECIAL_46>",
53
+ "<SPECIAL_47>",
54
+ "<SPECIAL_48>",
55
+ "<SPECIAL_49>",
56
+ "<SPECIAL_50>",
57
+ "<SPECIAL_51>",
58
+ "<SPECIAL_52>",
59
+ "<SPECIAL_53>",
60
+ "<SPECIAL_54>",
61
+ "<SPECIAL_55>",
62
+ "<SPECIAL_56>",
63
+ "<SPECIAL_57>",
64
+ "<SPECIAL_58>",
65
+ "<SPECIAL_59>",
66
+ "<SPECIAL_60>",
67
+ "<SPECIAL_61>",
68
+ "<SPECIAL_62>",
69
+ "<SPECIAL_63>",
70
+ "<SPECIAL_64>",
71
+ "<SPECIAL_65>",
72
+ "<SPECIAL_66>",
73
+ "<SPECIAL_67>",
74
+ "<SPECIAL_68>",
75
+ "<SPECIAL_69>",
76
+ "<SPECIAL_70>",
77
+ "<SPECIAL_71>",
78
+ "<SPECIAL_72>",
79
+ "<SPECIAL_73>",
80
+ "<SPECIAL_74>",
81
+ "<SPECIAL_75>",
82
+ "<SPECIAL_76>",
83
+ "<SPECIAL_77>",
84
+ "<SPECIAL_78>",
85
+ "<SPECIAL_79>",
86
+ "<SPECIAL_80>",
87
+ "<SPECIAL_81>",
88
+ "<SPECIAL_82>",
89
+ "<SPECIAL_83>",
90
+ "<SPECIAL_84>",
91
+ "<SPECIAL_85>",
92
+ "<SPECIAL_86>",
93
+ "<SPECIAL_87>",
94
+ "<SPECIAL_88>",
95
+ "<SPECIAL_89>",
96
+ "<SPECIAL_90>",
97
+ "<SPECIAL_91>",
98
+ "<SPECIAL_92>",
99
+ "<SPECIAL_93>",
100
+ "<SPECIAL_94>",
101
+ "<SPECIAL_95>",
102
+ "<SPECIAL_96>",
103
+ "<SPECIAL_97>",
104
+ "<SPECIAL_98>",
105
+ "<SPECIAL_99>",
106
+ "<SPECIAL_100>",
107
+ "<SPECIAL_101>",
108
+ "<SPECIAL_102>",
109
+ "<SPECIAL_103>",
110
+ "<SPECIAL_104>",
111
+ "<SPECIAL_105>",
112
+ "<SPECIAL_106>",
113
+ "<SPECIAL_107>",
114
+ "<SPECIAL_108>",
115
+ "<SPECIAL_109>",
116
+ "<SPECIAL_110>",
117
+ "<SPECIAL_111>",
118
+ "<SPECIAL_112>",
119
+ "<SPECIAL_113>",
120
+ "<SPECIAL_114>",
121
+ "<SPECIAL_115>",
122
+ "<SPECIAL_116>",
123
+ "<SPECIAL_117>",
124
+ "<SPECIAL_118>",
125
+ "<SPECIAL_119>",
126
+ "<SPECIAL_120>",
127
+ "<SPECIAL_121>",
128
+ "<SPECIAL_122>",
129
+ "<SPECIAL_123>",
130
+ "<SPECIAL_124>",
131
+ "<SPECIAL_125>",
132
+ "<SPECIAL_126>",
133
+ "<SPECIAL_127>",
134
+ "<SPECIAL_128>",
135
+ "<SPECIAL_129>",
136
+ "<SPECIAL_130>",
137
+ "<SPECIAL_131>",
138
+ "<SPECIAL_132>",
139
+ "<SPECIAL_133>",
140
+ "<SPECIAL_134>",
141
+ "<SPECIAL_135>",
142
+ "<SPECIAL_136>",
143
+ "<SPECIAL_137>",
144
+ "<SPECIAL_138>",
145
+ "<SPECIAL_139>",
146
+ "<SPECIAL_140>",
147
+ "<SPECIAL_141>",
148
+ "<SPECIAL_142>",
149
+ "<SPECIAL_143>",
150
+ "<SPECIAL_144>",
151
+ "<SPECIAL_145>",
152
+ "<SPECIAL_146>",
153
+ "<SPECIAL_147>",
154
+ "<SPECIAL_148>",
155
+ "<SPECIAL_149>",
156
+ "<SPECIAL_150>",
157
+ "<SPECIAL_151>",
158
+ "<SPECIAL_152>",
159
+ "<SPECIAL_153>",
160
+ "<SPECIAL_154>",
161
+ "<SPECIAL_155>",
162
+ "<SPECIAL_156>",
163
+ "<SPECIAL_157>",
164
+ "<SPECIAL_158>",
165
+ "<SPECIAL_159>",
166
+ "<SPECIAL_160>",
167
+ "<SPECIAL_161>",
168
+ "<SPECIAL_162>",
169
+ "<SPECIAL_163>",
170
+ "<SPECIAL_164>",
171
+ "<SPECIAL_165>",
172
+ "<SPECIAL_166>",
173
+ "<SPECIAL_167>",
174
+ "<SPECIAL_168>",
175
+ "<SPECIAL_169>",
176
+ "<SPECIAL_170>",
177
+ "<SPECIAL_171>",
178
+ "<SPECIAL_172>",
179
+ "<SPECIAL_173>",
180
+ "<SPECIAL_174>",
181
+ "<SPECIAL_175>",
182
+ "<SPECIAL_176>",
183
+ "<SPECIAL_177>",
184
+ "<SPECIAL_178>",
185
+ "<SPECIAL_179>",
186
+ "<SPECIAL_180>",
187
+ "<SPECIAL_181>",
188
+ "<SPECIAL_182>",
189
+ "<SPECIAL_183>",
190
+ "<SPECIAL_184>",
191
+ "<SPECIAL_185>",
192
+ "<SPECIAL_186>",
193
+ "<SPECIAL_187>",
194
+ "<SPECIAL_188>",
195
+ "<SPECIAL_189>",
196
+ "<SPECIAL_190>",
197
+ "<SPECIAL_191>",
198
+ "<SPECIAL_192>",
199
+ "<SPECIAL_193>",
200
+ "<SPECIAL_194>",
201
+ "<SPECIAL_195>",
202
+ "<SPECIAL_196>",
203
+ "<SPECIAL_197>",
204
+ "<SPECIAL_198>",
205
+ "<SPECIAL_199>",
206
+ "<SPECIAL_200>",
207
+ "<SPECIAL_201>",
208
+ "<SPECIAL_202>",
209
+ "<SPECIAL_203>",
210
+ "<SPECIAL_204>",
211
+ "<SPECIAL_205>",
212
+ "<SPECIAL_206>",
213
+ "<SPECIAL_207>",
214
+ "<SPECIAL_208>",
215
+ "<SPECIAL_209>",
216
+ "<SPECIAL_210>",
217
+ "<SPECIAL_211>",
218
+ "<SPECIAL_212>",
219
+ "<SPECIAL_213>",
220
+ "<SPECIAL_214>",
221
+ "<SPECIAL_215>",
222
+ "<SPECIAL_216>",
223
+ "<SPECIAL_217>",
224
+ "<SPECIAL_218>",
225
+ "<SPECIAL_219>",
226
+ "<SPECIAL_220>",
227
+ "<SPECIAL_221>",
228
+ "<SPECIAL_222>",
229
+ "<SPECIAL_223>",
230
+ "<SPECIAL_224>",
231
+ "<SPECIAL_225>",
232
+ "<SPECIAL_226>",
233
+ "<SPECIAL_227>",
234
+ "<SPECIAL_228>",
235
+ "<SPECIAL_229>",
236
+ "<SPECIAL_230>",
237
+ "<SPECIAL_231>",
238
+ "<SPECIAL_232>",
239
+ "<SPECIAL_233>",
240
+ "<SPECIAL_234>",
241
+ "<SPECIAL_235>",
242
+ "<SPECIAL_236>",
243
+ "<SPECIAL_237>",
244
+ "<SPECIAL_238>",
245
+ "<SPECIAL_239>",
246
+ "<SPECIAL_240>",
247
+ "<SPECIAL_241>",
248
+ "<SPECIAL_242>",
249
+ "<SPECIAL_243>",
250
+ "<SPECIAL_244>",
251
+ "<SPECIAL_245>",
252
+ "<SPECIAL_246>",
253
+ "<SPECIAL_247>",
254
+ "<SPECIAL_248>",
255
+ "<SPECIAL_249>",
256
+ "<SPECIAL_250>",
257
+ "<SPECIAL_251>",
258
+ "<SPECIAL_252>",
259
+ "<SPECIAL_253>",
260
+ "<SPECIAL_254>",
261
+ "<SPECIAL_255>",
262
+ "<SPECIAL_256>",
263
+ "<SPECIAL_257>",
264
+ "<SPECIAL_258>",
265
+ "<SPECIAL_259>",
266
+ "<SPECIAL_260>",
267
+ "<SPECIAL_261>",
268
+ "<SPECIAL_262>",
269
+ "<SPECIAL_263>",
270
+ "<SPECIAL_264>",
271
+ "<SPECIAL_265>",
272
+ "<SPECIAL_266>",
273
+ "<SPECIAL_267>",
274
+ "<SPECIAL_268>",
275
+ "<SPECIAL_269>",
276
+ "<SPECIAL_270>",
277
+ "<SPECIAL_271>",
278
+ "<SPECIAL_272>",
279
+ "<SPECIAL_273>",
280
+ "<SPECIAL_274>",
281
+ "<SPECIAL_275>",
282
+ "<SPECIAL_276>",
283
+ "<SPECIAL_277>",
284
+ "<SPECIAL_278>",
285
+ "<SPECIAL_279>",
286
+ "<SPECIAL_280>",
287
+ "<SPECIAL_281>",
288
+ "<SPECIAL_282>",
289
+ "<SPECIAL_283>",
290
+ "<SPECIAL_284>",
291
+ "<SPECIAL_285>",
292
+ "<SPECIAL_286>",
293
+ "<SPECIAL_287>",
294
+ "<SPECIAL_288>",
295
+ "<SPECIAL_289>",
296
+ "<SPECIAL_290>",
297
+ "<SPECIAL_291>",
298
+ "<SPECIAL_292>",
299
+ "<SPECIAL_293>",
300
+ "<SPECIAL_294>",
301
+ "<SPECIAL_295>",
302
+ "<SPECIAL_296>",
303
+ "<SPECIAL_297>",
304
+ "<SPECIAL_298>",
305
+ "<SPECIAL_299>",
306
+ "<SPECIAL_300>",
307
+ "<SPECIAL_301>",
308
+ "<SPECIAL_302>",
309
+ "<SPECIAL_303>",
310
+ "<SPECIAL_304>",
311
+ "<SPECIAL_305>",
312
+ "<SPECIAL_306>",
313
+ "<SPECIAL_307>",
314
+ "<SPECIAL_308>",
315
+ "<SPECIAL_309>",
316
+ "<SPECIAL_310>",
317
+ "<SPECIAL_311>",
318
+ "<SPECIAL_312>",
319
+ "<SPECIAL_313>",
320
+ "<SPECIAL_314>",
321
+ "<SPECIAL_315>",
322
+ "<SPECIAL_316>",
323
+ "<SPECIAL_317>",
324
+ "<SPECIAL_318>",
325
+ "<SPECIAL_319>",
326
+ "<SPECIAL_320>",
327
+ "<SPECIAL_321>",
328
+ "<SPECIAL_322>",
329
+ "<SPECIAL_323>",
330
+ "<SPECIAL_324>",
331
+ "<SPECIAL_325>",
332
+ "<SPECIAL_326>",
333
+ "<SPECIAL_327>",
334
+ "<SPECIAL_328>",
335
+ "<SPECIAL_329>",
336
+ "<SPECIAL_330>",
337
+ "<SPECIAL_331>",
338
+ "<SPECIAL_332>",
339
+ "<SPECIAL_333>",
340
+ "<SPECIAL_334>",
341
+ "<SPECIAL_335>",
342
+ "<SPECIAL_336>",
343
+ "<SPECIAL_337>",
344
+ "<SPECIAL_338>",
345
+ "<SPECIAL_339>",
346
+ "<SPECIAL_340>",
347
+ "<SPECIAL_341>",
348
+ "<SPECIAL_342>",
349
+ "<SPECIAL_343>",
350
+ "<SPECIAL_344>",
351
+ "<SPECIAL_345>",
352
+ "<SPECIAL_346>",
353
+ "<SPECIAL_347>",
354
+ "<SPECIAL_348>",
355
+ "<SPECIAL_349>",
356
+ "<SPECIAL_350>",
357
+ "<SPECIAL_351>",
358
+ "<SPECIAL_352>",
359
+ "<SPECIAL_353>",
360
+ "<SPECIAL_354>",
361
+ "<SPECIAL_355>",
362
+ "<SPECIAL_356>",
363
+ "<SPECIAL_357>",
364
+ "<SPECIAL_358>",
365
+ "<SPECIAL_359>",
366
+ "<SPECIAL_360>",
367
+ "<SPECIAL_361>",
368
+ "<SPECIAL_362>",
369
+ "<SPECIAL_363>",
370
+ "<SPECIAL_364>",
371
+ "<SPECIAL_365>",
372
+ "<SPECIAL_366>",
373
+ "<SPECIAL_367>",
374
+ "<SPECIAL_368>",
375
+ "<SPECIAL_369>",
376
+ "<SPECIAL_370>",
377
+ "<SPECIAL_371>",
378
+ "<SPECIAL_372>",
379
+ "<SPECIAL_373>",
380
+ "<SPECIAL_374>",
381
+ "<SPECIAL_375>",
382
+ "<SPECIAL_376>",
383
+ "<SPECIAL_377>",
384
+ "<SPECIAL_378>",
385
+ "<SPECIAL_379>",
386
+ "<SPECIAL_380>",
387
+ "<SPECIAL_381>",
388
+ "<SPECIAL_382>",
389
+ "<SPECIAL_383>",
390
+ "<SPECIAL_384>",
391
+ "<SPECIAL_385>",
392
+ "<SPECIAL_386>",
393
+ "<SPECIAL_387>",
394
+ "<SPECIAL_388>",
395
+ "<SPECIAL_389>",
396
+ "<SPECIAL_390>",
397
+ "<SPECIAL_391>",
398
+ "<SPECIAL_392>",
399
+ "<SPECIAL_393>",
400
+ "<SPECIAL_394>",
401
+ "<SPECIAL_395>",
402
+ "<SPECIAL_396>",
403
+ "<SPECIAL_397>",
404
+ "<SPECIAL_398>",
405
+ "<SPECIAL_399>",
406
+ "<SPECIAL_400>",
407
+ "<SPECIAL_401>",
408
+ "<SPECIAL_402>",
409
+ "<SPECIAL_403>",
410
+ "<SPECIAL_404>",
411
+ "<SPECIAL_405>",
412
+ "<SPECIAL_406>",
413
+ "<SPECIAL_407>",
414
+ "<SPECIAL_408>",
415
+ "<SPECIAL_409>",
416
+ "<SPECIAL_410>",
417
+ "<SPECIAL_411>",
418
+ "<SPECIAL_412>",
419
+ "<SPECIAL_413>",
420
+ "<SPECIAL_414>",
421
+ "<SPECIAL_415>",
422
+ "<SPECIAL_416>",
423
+ "<SPECIAL_417>",
424
+ "<SPECIAL_418>",
425
+ "<SPECIAL_419>",
426
+ "<SPECIAL_420>",
427
+ "<SPECIAL_421>",
428
+ "<SPECIAL_422>",
429
+ "<SPECIAL_423>",
430
+ "<SPECIAL_424>",
431
+ "<SPECIAL_425>",
432
+ "<SPECIAL_426>",
433
+ "<SPECIAL_427>",
434
+ "<SPECIAL_428>",
435
+ "<SPECIAL_429>",
436
+ "<SPECIAL_430>",
437
+ "<SPECIAL_431>",
438
+ "<SPECIAL_432>",
439
+ "<SPECIAL_433>",
440
+ "<SPECIAL_434>",
441
+ "<SPECIAL_435>",
442
+ "<SPECIAL_436>",
443
+ "<SPECIAL_437>",
444
+ "<SPECIAL_438>",
445
+ "<SPECIAL_439>",
446
+ "<SPECIAL_440>",
447
+ "<SPECIAL_441>",
448
+ "<SPECIAL_442>",
449
+ "<SPECIAL_443>",
450
+ "<SPECIAL_444>",
451
+ "<SPECIAL_445>",
452
+ "<SPECIAL_446>",
453
+ "<SPECIAL_447>",
454
+ "<SPECIAL_448>",
455
+ "<SPECIAL_449>",
456
+ "<SPECIAL_450>",
457
+ "<SPECIAL_451>",
458
+ "<SPECIAL_452>",
459
+ "<SPECIAL_453>",
460
+ "<SPECIAL_454>",
461
+ "<SPECIAL_455>",
462
+ "<SPECIAL_456>",
463
+ "<SPECIAL_457>",
464
+ "<SPECIAL_458>",
465
+ "<SPECIAL_459>",
466
+ "<SPECIAL_460>",
467
+ "<SPECIAL_461>",
468
+ "<SPECIAL_462>",
469
+ "<SPECIAL_463>",
470
+ "<SPECIAL_464>",
471
+ "<SPECIAL_465>",
472
+ "<SPECIAL_466>",
473
+ "<SPECIAL_467>",
474
+ "<SPECIAL_468>",
475
+ "<SPECIAL_469>",
476
+ "<SPECIAL_470>",
477
+ "<SPECIAL_471>",
478
+ "<SPECIAL_472>",
479
+ "<SPECIAL_473>",
480
+ "<SPECIAL_474>",
481
+ "<SPECIAL_475>",
482
+ "<SPECIAL_476>",
483
+ "<SPECIAL_477>",
484
+ "<SPECIAL_478>",
485
+ "<SPECIAL_479>",
486
+ "<SPECIAL_480>",
487
+ "<SPECIAL_481>",
488
+ "<SPECIAL_482>",
489
+ "<SPECIAL_483>",
490
+ "<SPECIAL_484>",
491
+ "<SPECIAL_485>",
492
+ "<SPECIAL_486>",
493
+ "<SPECIAL_487>",
494
+ "<SPECIAL_488>",
495
+ "<SPECIAL_489>",
496
+ "<SPECIAL_490>",
497
+ "<SPECIAL_491>",
498
+ "<SPECIAL_492>",
499
+ "<SPECIAL_493>",
500
+ "<SPECIAL_494>",
501
+ "<SPECIAL_495>",
502
+ "<SPECIAL_496>",
503
+ "<SPECIAL_497>",
504
+ "<SPECIAL_498>",
505
+ "<SPECIAL_499>",
506
+ "<SPECIAL_500>",
507
+ "<SPECIAL_501>",
508
+ "<SPECIAL_502>",
509
+ "<SPECIAL_503>",
510
+ "<SPECIAL_504>",
511
+ "<SPECIAL_505>",
512
+ "<SPECIAL_506>",
513
+ "<SPECIAL_507>",
514
+ "<SPECIAL_508>",
515
+ "<SPECIAL_509>",
516
+ "<SPECIAL_510>",
517
+ "<SPECIAL_511>",
518
+ "<SPECIAL_512>",
519
+ "<SPECIAL_513>",
520
+ "<SPECIAL_514>",
521
+ "<SPECIAL_515>",
522
+ "<SPECIAL_516>",
523
+ "<SPECIAL_517>",
524
+ "<SPECIAL_518>",
525
+ "<SPECIAL_519>",
526
+ "<SPECIAL_520>",
527
+ "<SPECIAL_521>",
528
+ "<SPECIAL_522>",
529
+ "<SPECIAL_523>",
530
+ "<SPECIAL_524>",
531
+ "<SPECIAL_525>",
532
+ "<SPECIAL_526>",
533
+ "<SPECIAL_527>",
534
+ "<SPECIAL_528>",
535
+ "<SPECIAL_529>",
536
+ "<SPECIAL_530>",
537
+ "<SPECIAL_531>",
538
+ "<SPECIAL_532>",
539
+ "<SPECIAL_533>",
540
+ "<SPECIAL_534>",
541
+ "<SPECIAL_535>",
542
+ "<SPECIAL_536>",
543
+ "<SPECIAL_537>",
544
+ "<SPECIAL_538>",
545
+ "<SPECIAL_539>",
546
+ "<SPECIAL_540>",
547
+ "<SPECIAL_541>",
548
+ "<SPECIAL_542>",
549
+ "<SPECIAL_543>",
550
+ "<SPECIAL_544>",
551
+ "<SPECIAL_545>",
552
+ "<SPECIAL_546>",
553
+ "<SPECIAL_547>",
554
+ "<SPECIAL_548>",
555
+ "<SPECIAL_549>",
556
+ "<SPECIAL_550>",
557
+ "<SPECIAL_551>",
558
+ "<SPECIAL_552>",
559
+ "<SPECIAL_553>",
560
+ "<SPECIAL_554>",
561
+ "<SPECIAL_555>",
562
+ "<SPECIAL_556>",
563
+ "<SPECIAL_557>",
564
+ "<SPECIAL_558>",
565
+ "<SPECIAL_559>",
566
+ "<SPECIAL_560>",
567
+ "<SPECIAL_561>",
568
+ "<SPECIAL_562>",
569
+ "<SPECIAL_563>",
570
+ "<SPECIAL_564>",
571
+ "<SPECIAL_565>",
572
+ "<SPECIAL_566>",
573
+ "<SPECIAL_567>",
574
+ "<SPECIAL_568>",
575
+ "<SPECIAL_569>",
576
+ "<SPECIAL_570>",
577
+ "<SPECIAL_571>",
578
+ "<SPECIAL_572>",
579
+ "<SPECIAL_573>",
580
+ "<SPECIAL_574>",
581
+ "<SPECIAL_575>",
582
+ "<SPECIAL_576>",
583
+ "<SPECIAL_577>",
584
+ "<SPECIAL_578>",
585
+ "<SPECIAL_579>",
586
+ "<SPECIAL_580>",
587
+ "<SPECIAL_581>",
588
+ "<SPECIAL_582>",
589
+ "<SPECIAL_583>",
590
+ "<SPECIAL_584>",
591
+ "<SPECIAL_585>",
592
+ "<SPECIAL_586>",
593
+ "<SPECIAL_587>",
594
+ "<SPECIAL_588>",
595
+ "<SPECIAL_589>",
596
+ "<SPECIAL_590>",
597
+ "<SPECIAL_591>",
598
+ "<SPECIAL_592>",
599
+ "<SPECIAL_593>",
600
+ "<SPECIAL_594>",
601
+ "<SPECIAL_595>",
602
+ "<SPECIAL_596>",
603
+ "<SPECIAL_597>",
604
+ "<SPECIAL_598>",
605
+ "<SPECIAL_599>",
606
+ "<SPECIAL_600>",
607
+ "<SPECIAL_601>",
608
+ "<SPECIAL_602>",
609
+ "<SPECIAL_603>",
610
+ "<SPECIAL_604>",
611
+ "<SPECIAL_605>",
612
+ "<SPECIAL_606>",
613
+ "<SPECIAL_607>",
614
+ "<SPECIAL_608>",
615
+ "<SPECIAL_609>",
616
+ "<SPECIAL_610>",
617
+ "<SPECIAL_611>",
618
+ "<SPECIAL_612>",
619
+ "<SPECIAL_613>",
620
+ "<SPECIAL_614>",
621
+ "<SPECIAL_615>",
622
+ "<SPECIAL_616>",
623
+ "<SPECIAL_617>",
624
+ "<SPECIAL_618>",
625
+ "<SPECIAL_619>",
626
+ "<SPECIAL_620>",
627
+ "<SPECIAL_621>",
628
+ "<SPECIAL_622>",
629
+ "<SPECIAL_623>",
630
+ "<SPECIAL_624>",
631
+ "<SPECIAL_625>",
632
+ "<SPECIAL_626>",
633
+ "<SPECIAL_627>",
634
+ "<SPECIAL_628>",
635
+ "<SPECIAL_629>",
636
+ "<SPECIAL_630>",
637
+ "<SPECIAL_631>",
638
+ "<SPECIAL_632>",
639
+ "<SPECIAL_633>",
640
+ "<SPECIAL_634>",
641
+ "<SPECIAL_635>",
642
+ "<SPECIAL_636>",
643
+ "<SPECIAL_637>",
644
+ "<SPECIAL_638>",
645
+ "<SPECIAL_639>",
646
+ "<SPECIAL_640>",
647
+ "<SPECIAL_641>",
648
+ "<SPECIAL_642>",
649
+ "<SPECIAL_643>",
650
+ "<SPECIAL_644>",
651
+ "<SPECIAL_645>",
652
+ "<SPECIAL_646>",
653
+ "<SPECIAL_647>",
654
+ "<SPECIAL_648>",
655
+ "<SPECIAL_649>",
656
+ "<SPECIAL_650>",
657
+ "<SPECIAL_651>",
658
+ "<SPECIAL_652>",
659
+ "<SPECIAL_653>",
660
+ "<SPECIAL_654>",
661
+ "<SPECIAL_655>",
662
+ "<SPECIAL_656>",
663
+ "<SPECIAL_657>",
664
+ "<SPECIAL_658>",
665
+ "<SPECIAL_659>",
666
+ "<SPECIAL_660>",
667
+ "<SPECIAL_661>",
668
+ "<SPECIAL_662>",
669
+ "<SPECIAL_663>",
670
+ "<SPECIAL_664>",
671
+ "<SPECIAL_665>",
672
+ "<SPECIAL_666>",
673
+ "<SPECIAL_667>",
674
+ "<SPECIAL_668>",
675
+ "<SPECIAL_669>",
676
+ "<SPECIAL_670>",
677
+ "<SPECIAL_671>",
678
+ "<SPECIAL_672>",
679
+ "<SPECIAL_673>",
680
+ "<SPECIAL_674>",
681
+ "<SPECIAL_675>",
682
+ "<SPECIAL_676>",
683
+ "<SPECIAL_677>",
684
+ "<SPECIAL_678>",
685
+ "<SPECIAL_679>",
686
+ "<SPECIAL_680>",
687
+ "<SPECIAL_681>",
688
+ "<SPECIAL_682>",
689
+ "<SPECIAL_683>",
690
+ "<SPECIAL_684>",
691
+ "<SPECIAL_685>",
692
+ "<SPECIAL_686>",
693
+ "<SPECIAL_687>",
694
+ "<SPECIAL_688>",
695
+ "<SPECIAL_689>",
696
+ "<SPECIAL_690>",
697
+ "<SPECIAL_691>",
698
+ "<SPECIAL_692>",
699
+ "<SPECIAL_693>",
700
+ "<SPECIAL_694>",
701
+ "<SPECIAL_695>",
702
+ "<SPECIAL_696>",
703
+ "<SPECIAL_697>",
704
+ "<SPECIAL_698>",
705
+ "<SPECIAL_699>",
706
+ "<SPECIAL_700>",
707
+ "<SPECIAL_701>",
708
+ "<SPECIAL_702>",
709
+ "<SPECIAL_703>",
710
+ "<SPECIAL_704>",
711
+ "<SPECIAL_705>",
712
+ "<SPECIAL_706>",
713
+ "<SPECIAL_707>",
714
+ "<SPECIAL_708>",
715
+ "<SPECIAL_709>",
716
+ "<SPECIAL_710>",
717
+ "<SPECIAL_711>",
718
+ "<SPECIAL_712>",
719
+ "<SPECIAL_713>",
720
+ "<SPECIAL_714>",
721
+ "<SPECIAL_715>",
722
+ "<SPECIAL_716>",
723
+ "<SPECIAL_717>",
724
+ "<SPECIAL_718>",
725
+ "<SPECIAL_719>",
726
+ "<SPECIAL_720>",
727
+ "<SPECIAL_721>",
728
+ "<SPECIAL_722>",
729
+ "<SPECIAL_723>",
730
+ "<SPECIAL_724>",
731
+ "<SPECIAL_725>",
732
+ "<SPECIAL_726>",
733
+ "<SPECIAL_727>",
734
+ "<SPECIAL_728>",
735
+ "<SPECIAL_729>",
736
+ "<SPECIAL_730>",
737
+ "<SPECIAL_731>",
738
+ "<SPECIAL_732>",
739
+ "<SPECIAL_733>",
740
+ "<SPECIAL_734>",
741
+ "<SPECIAL_735>",
742
+ "<SPECIAL_736>",
743
+ "<SPECIAL_737>",
744
+ "<SPECIAL_738>",
745
+ "<SPECIAL_739>",
746
+ "<SPECIAL_740>",
747
+ "<SPECIAL_741>",
748
+ "<SPECIAL_742>",
749
+ "<SPECIAL_743>",
750
+ "<SPECIAL_744>",
751
+ "<SPECIAL_745>",
752
+ "<SPECIAL_746>",
753
+ "<SPECIAL_747>",
754
+ "<SPECIAL_748>",
755
+ "<SPECIAL_749>",
756
+ "<SPECIAL_750>",
757
+ "<SPECIAL_751>",
758
+ "<SPECIAL_752>",
759
+ "<SPECIAL_753>",
760
+ "<SPECIAL_754>",
761
+ "<SPECIAL_755>",
762
+ "<SPECIAL_756>",
763
+ "<SPECIAL_757>",
764
+ "<SPECIAL_758>",
765
+ "<SPECIAL_759>",
766
+ "<SPECIAL_760>",
767
+ "<SPECIAL_761>",
768
+ "<SPECIAL_762>",
769
+ "<SPECIAL_763>",
770
+ "<SPECIAL_764>",
771
+ "<SPECIAL_765>",
772
+ "<SPECIAL_766>",
773
+ "<SPECIAL_767>",
774
+ "<SPECIAL_768>",
775
+ "<SPECIAL_769>",
776
+ "<SPECIAL_770>",
777
+ "<SPECIAL_771>",
778
+ "<SPECIAL_772>",
779
+ "<SPECIAL_773>",
780
+ "<SPECIAL_774>",
781
+ "<SPECIAL_775>",
782
+ "<SPECIAL_776>",
783
+ "<SPECIAL_777>",
784
+ "<SPECIAL_778>",
785
+ "<SPECIAL_779>",
786
+ "<SPECIAL_780>",
787
+ "<SPECIAL_781>",
788
+ "<SPECIAL_782>",
789
+ "<SPECIAL_783>",
790
+ "<SPECIAL_784>",
791
+ "<SPECIAL_785>",
792
+ "<SPECIAL_786>",
793
+ "<SPECIAL_787>",
794
+ "<SPECIAL_788>",
795
+ "<SPECIAL_789>",
796
+ "<SPECIAL_790>",
797
+ "<SPECIAL_791>",
798
+ "<SPECIAL_792>",
799
+ "<SPECIAL_793>",
800
+ "<SPECIAL_794>",
801
+ "<SPECIAL_795>",
802
+ "<SPECIAL_796>",
803
+ "<SPECIAL_797>",
804
+ "<SPECIAL_798>",
805
+ "<SPECIAL_799>",
806
+ "<SPECIAL_800>",
807
+ "<SPECIAL_801>",
808
+ "<SPECIAL_802>",
809
+ "<SPECIAL_803>",
810
+ "<SPECIAL_804>",
811
+ "<SPECIAL_805>",
812
+ "<SPECIAL_806>",
813
+ "<SPECIAL_807>",
814
+ "<SPECIAL_808>",
815
+ "<SPECIAL_809>",
816
+ "<SPECIAL_810>",
817
+ "<SPECIAL_811>",
818
+ "<SPECIAL_812>",
819
+ "<SPECIAL_813>",
820
+ "<SPECIAL_814>",
821
+ "<SPECIAL_815>",
822
+ "<SPECIAL_816>",
823
+ "<SPECIAL_817>",
824
+ "<SPECIAL_818>",
825
+ "<SPECIAL_819>",
826
+ "<SPECIAL_820>",
827
+ "<SPECIAL_821>",
828
+ "<SPECIAL_822>",
829
+ "<SPECIAL_823>",
830
+ "<SPECIAL_824>",
831
+ "<SPECIAL_825>",
832
+ "<SPECIAL_826>",
833
+ "<SPECIAL_827>",
834
+ "<SPECIAL_828>",
835
+ "<SPECIAL_829>",
836
+ "<SPECIAL_830>",
837
+ "<SPECIAL_831>",
838
+ "<SPECIAL_832>",
839
+ "<SPECIAL_833>",
840
+ "<SPECIAL_834>",
841
+ "<SPECIAL_835>",
842
+ "<SPECIAL_836>",
843
+ "<SPECIAL_837>",
844
+ "<SPECIAL_838>",
845
+ "<SPECIAL_839>",
846
+ "<SPECIAL_840>",
847
+ "<SPECIAL_841>",
848
+ "<SPECIAL_842>",
849
+ "<SPECIAL_843>",
850
+ "<SPECIAL_844>",
851
+ "<SPECIAL_845>",
852
+ "<SPECIAL_846>",
853
+ "<SPECIAL_847>",
854
+ "<SPECIAL_848>",
855
+ "<SPECIAL_849>",
856
+ "<SPECIAL_850>",
857
+ "<SPECIAL_851>",
858
+ "<SPECIAL_852>",
859
+ "<SPECIAL_853>",
860
+ "<SPECIAL_854>",
861
+ "<SPECIAL_855>",
862
+ "<SPECIAL_856>",
863
+ "<SPECIAL_857>",
864
+ "<SPECIAL_858>",
865
+ "<SPECIAL_859>",
866
+ "<SPECIAL_860>",
867
+ "<SPECIAL_861>",
868
+ "<SPECIAL_862>",
869
+ "<SPECIAL_863>",
870
+ "<SPECIAL_864>",
871
+ "<SPECIAL_865>",
872
+ "<SPECIAL_866>",
873
+ "<SPECIAL_867>",
874
+ "<SPECIAL_868>",
875
+ "<SPECIAL_869>",
876
+ "<SPECIAL_870>",
877
+ "<SPECIAL_871>",
878
+ "<SPECIAL_872>",
879
+ "<SPECIAL_873>",
880
+ "<SPECIAL_874>",
881
+ "<SPECIAL_875>",
882
+ "<SPECIAL_876>",
883
+ "<SPECIAL_877>",
884
+ "<SPECIAL_878>",
885
+ "<SPECIAL_879>",
886
+ "<SPECIAL_880>",
887
+ "<SPECIAL_881>",
888
+ "<SPECIAL_882>",
889
+ "<SPECIAL_883>",
890
+ "<SPECIAL_884>",
891
+ "<SPECIAL_885>",
892
+ "<SPECIAL_886>",
893
+ "<SPECIAL_887>",
894
+ "<SPECIAL_888>",
895
+ "<SPECIAL_889>",
896
+ "<SPECIAL_890>",
897
+ "<SPECIAL_891>",
898
+ "<SPECIAL_892>",
899
+ "<SPECIAL_893>",
900
+ "<SPECIAL_894>",
901
+ "<SPECIAL_895>",
902
+ "<SPECIAL_896>",
903
+ "<SPECIAL_897>",
904
+ "<SPECIAL_898>",
905
+ "<SPECIAL_899>",
906
+ "<SPECIAL_900>",
907
+ "<SPECIAL_901>",
908
+ "<SPECIAL_902>",
909
+ "<SPECIAL_903>",
910
+ "<SPECIAL_904>",
911
+ "<SPECIAL_905>",
912
+ "<SPECIAL_906>",
913
+ "<SPECIAL_907>",
914
+ "<SPECIAL_908>",
915
+ "<SPECIAL_909>",
916
+ "<SPECIAL_910>",
917
+ "<SPECIAL_911>",
918
+ "<SPECIAL_912>",
919
+ "<SPECIAL_913>",
920
+ "<SPECIAL_914>",
921
+ "<SPECIAL_915>",
922
+ "<SPECIAL_916>",
923
+ "<SPECIAL_917>",
924
+ "<SPECIAL_918>",
925
+ "<SPECIAL_919>",
926
+ "<SPECIAL_920>",
927
+ "<SPECIAL_921>",
928
+ "<SPECIAL_922>",
929
+ "<SPECIAL_923>",
930
+ "<SPECIAL_924>",
931
+ "<SPECIAL_925>",
932
+ "<SPECIAL_926>",
933
+ "<SPECIAL_927>",
934
+ "<SPECIAL_928>",
935
+ "<SPECIAL_929>",
936
+ "<SPECIAL_930>",
937
+ "<SPECIAL_931>",
938
+ "<SPECIAL_932>",
939
+ "<SPECIAL_933>",
940
+ "<SPECIAL_934>",
941
+ "<SPECIAL_935>",
942
+ "<SPECIAL_936>",
943
+ "<SPECIAL_937>",
944
+ "<SPECIAL_938>",
945
+ "<SPECIAL_939>",
946
+ "<SPECIAL_940>",
947
+ "<SPECIAL_941>",
948
+ "<SPECIAL_942>",
949
+ "<SPECIAL_943>",
950
+ "<SPECIAL_944>",
951
+ "<SPECIAL_945>",
952
+ "<SPECIAL_946>",
953
+ "<SPECIAL_947>",
954
+ "<SPECIAL_948>",
955
+ "<SPECIAL_949>",
956
+ "<SPECIAL_950>",
957
+ "<SPECIAL_951>",
958
+ "<SPECIAL_952>",
959
+ "<SPECIAL_953>",
960
+ "<SPECIAL_954>",
961
+ "<SPECIAL_955>",
962
+ "<SPECIAL_956>",
963
+ "<SPECIAL_957>",
964
+ "<SPECIAL_958>",
965
+ "<SPECIAL_959>",
966
+ "<SPECIAL_960>",
967
+ "<SPECIAL_961>",
968
+ "<SPECIAL_962>",
969
+ "<SPECIAL_963>",
970
+ "<SPECIAL_964>",
971
+ "<SPECIAL_965>",
972
+ "<SPECIAL_966>",
973
+ "<SPECIAL_967>",
974
+ "<SPECIAL_968>",
975
+ "<SPECIAL_969>",
976
+ "<SPECIAL_970>",
977
+ "<SPECIAL_971>",
978
+ "<SPECIAL_972>",
979
+ "<SPECIAL_973>",
980
+ "<SPECIAL_974>",
981
+ "<SPECIAL_975>",
982
+ "<SPECIAL_976>",
983
+ "<SPECIAL_977>",
984
+ "<SPECIAL_978>",
985
+ "<SPECIAL_979>",
986
+ "<SPECIAL_980>",
987
+ "<SPECIAL_981>",
988
+ "<SPECIAL_982>",
989
+ "<SPECIAL_983>",
990
+ "<SPECIAL_984>",
991
+ "<SPECIAL_985>",
992
+ "<SPECIAL_986>",
993
+ "<SPECIAL_987>",
994
+ "<SPECIAL_988>",
995
+ "<SPECIAL_989>",
996
+ "<SPECIAL_990>",
997
+ "<SPECIAL_991>",
998
+ "<SPECIAL_992>",
999
+ "<SPECIAL_993>",
1000
+ "<SPECIAL_994>",
1001
+ "<SPECIAL_995>",
1002
+ "<SPECIAL_996>",
1003
+ "<SPECIAL_997>",
1004
+ "<SPECIAL_998>",
1005
+ "<SPECIAL_999>"
1006
+ ],
1007
+ "is_local": true,
1008
+ "model_max_length": 1000000000000000019884624838656,
1009
+ "pad_token": "<pad>",
1010
+ "processor_class": "PixtralProcessor",
1011
+ "tokenizer_class": "TokenizersBackend",
1012
+ "unk_token": "<unk>"
1013
+ }
cpt_devstral_24B/best_adapter/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62526ec2433add7ac031c48b1f6ff360f1ade77275765112cbf7cf361d64ca5
3
+ size 5201
cpt_devstral_24B/checkpoints/checkpoint-400/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /workspace/Models/Devstral-Small-2-24B-Instruct-2512
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/workspace/Models/Devstral-Small-2-24B-Instruct-2512
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.18.0
cpt_devstral_24B/checkpoints/checkpoint-400/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "/workspace/Models/Devstral-Small-2-24B-Instruct-2512",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 128,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "k_proj",
33
+ "o_proj",
34
+ "q_proj",
35
+ "v_proj"
36
+ ],
37
+ "target_parameters": null,
38
+ "task_type": "CAUSAL_LM",
39
+ "trainable_token_indices": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
cpt_devstral_24B/checkpoints/checkpoint-400/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb99e750772d0be3eae91b1278f180702595f9e801cdcfe108166e6afe96e5ca
3
+ size 364983848
cpt_devstral_24B/checkpoints/checkpoint-400/chat_template.jinja ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {#- Default system message if no system prompt is passed. #}
2
+ {%- set default_system_message = '' %}
3
+
4
+ {#- Begin of sequence token. #}
5
+ {{- bos_token }}
6
+
7
+ {#- Handle system prompt if it exists. #}
8
+ {#- System prompt supports text content or text chunks. #}
9
+ {%- if messages[0]['role'] == 'system' %}
10
+ {{- '[SYSTEM_PROMPT]' -}}
11
+ {%- if messages[0]['content'] is string %}
12
+ {{- messages[0]['content'] -}}
13
+ {%- else %}
14
+ {%- for block in messages[0]['content'] %}
15
+ {%- if block['type'] == 'text' %}
16
+ {{- block['text'] }}
17
+ {%- else %}
18
+ {{- raise_exception('Only text chunks are supported in system message contents.') }}
19
+ {%- endif %}
20
+ {%- endfor %}
21
+ {%- endif %}
22
+ {{- '[/SYSTEM_PROMPT]' -}}
23
+ {%- set loop_messages = messages[1:] %}
24
+ {%- else %}
25
+ {%- set loop_messages = messages %}
26
+ {%- if default_system_message != '' %}
27
+ {{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }}
28
+ {%- endif %}
29
+ {%- endif %}
30
+
31
+
32
+ {#- Tools definition #}
33
+ {%- set tools_definition = '' %}
34
+ {%- set has_tools = false %}
35
+ {%- if tools is defined and tools is not none and tools|length > 0 %}
36
+ {%- set has_tools = true %}
37
+ {%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %}
38
+ {{- tools_definition }}
39
+ {%- endif %}
40
+
41
+ {#- Checks for alternating user/assistant messages. #}
42
+ {%- set ns = namespace(index=0) %}
43
+ {%- for message in loop_messages %}
44
+ {%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %}
45
+ {%- if (message['role'] == 'user') != (ns.index % 2 == 0) %}
46
+ {{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }}
47
+ {%- endif %}
48
+ {%- set ns.index = ns.index + 1 %}
49
+ {%- endif %}
50
+ {%- endfor %}
51
+
52
+ {#- Handle conversation messages. #}
53
+ {%- for message in loop_messages %}
54
+
55
+ {#- User messages supports text content or text and image chunks. #}
56
+ {%- if message['role'] == 'user' %}
57
+ {%- if message['content'] is string %}
58
+ {{- '[INST]' + message['content'] + '[/INST]' }}
59
+ {%- elif message['content'] | length > 0 %}
60
+ {{- '[INST]' }}
61
+ {%- if message['content'] | length == 2 %}
62
+ {%- set blocks = message['content'] | sort(attribute='type') %}
63
+ {%- else %}
64
+ {%- set blocks = message['content'] %}
65
+ {%- endif %}
66
+ {%- for block in blocks %}
67
+ {%- if block['type'] == 'text' %}
68
+ {{- block['text'] }}
69
+ {%- elif block['type'] in ['image', 'image_url'] %}
70
+ {{- '[IMG]' }}
71
+ {%- else %}
72
+ {{- raise_exception('Only text, image and image_url chunks are supported in user message content.') }}
73
+ {%- endif %}
74
+ {%- endfor %}
75
+ {{- '[/INST]' }}
76
+ {%- else %}
77
+ {{- raise_exception('User message must have a string or a list of chunks in content') }}
78
+ {%- endif %}
79
+
80
+ {#- Assistant messages supports text content or text and image chunks. #}
81
+ {%- elif message['role'] == 'assistant' %}
82
+ {%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}
83
+ {{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }}
84
+ {%- endif %}
85
+
86
+ {%- if message['content'] is string %}
87
+ {{- message['content'] }}
88
+ {%- elif message['content'] | length > 0 %}
89
+ {%- for block in message['content'] %}
90
+ {%- if block['type'] == 'text' %}
91
+ {{- block['text'] }}
92
+ {%- else %}
93
+ {{- raise_exception('Only text chunks are supported in assistant message contents.') }}
94
+ {%- endif %}
95
+ {%- endfor %}
96
+ {%- endif %}
97
+
98
+ {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %}
99
+ {%- for tool in message['tool_calls'] %}
100
+ {%- set arguments = tool['function']['arguments'] %}
101
+ {%- if arguments is not string %}
102
+ {%- set arguments = arguments|tojson|safe %}
103
+ {%- elif arguments == '' %}
104
+ {%- set arguments = '{}' %}
105
+ {%- endif %}
106
+ {{- '[TOOL_CALLS]' + tool['function']['name'] + '[ARGS]' + arguments }}
107
+ {%- endfor %}
108
+ {%- endif %}
109
+
110
+ {#- End of sequence token for each assistant messages. #}
111
+ {{- eos_token }}
112
+
113
+ {#- Tool messages only supports text content. #}
114
+ {%- elif message['role'] == 'tool' %}
115
+ {{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }}
116
+
117
+ {#- Raise exception for unsupported roles. #}
118
+ {%- else %}
119
+ {{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }}
120
+ {%- endif %}
121
+ {%- endfor %}
cpt_devstral_24B/checkpoints/checkpoint-400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5539bb24f0e90b59b5feaa90ce48faf2a89fef6e84d938cfdb015b096793c9e
3
+ size 160131559
cpt_devstral_24B/checkpoints/checkpoint-400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c636bd3acde73735b158db1a7551369c5642650cac64756dc42008fea4a8a41c
3
+ size 14645
cpt_devstral_24B/checkpoints/checkpoint-400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63227fb4710085463616ae42dc93bbd119bc402348d37ec9f6ab60b0d130235e
3
+ size 1465
cpt_devstral_24B/checkpoints/checkpoint-400/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286acad9b0e27fce778ac429763536accf618ccb6ed72963b6f94685e531c5c7
3
+ size 17077402
cpt_devstral_24B/checkpoints/checkpoint-400/tokenizer_config.json ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "extra_special_tokens": [
6
+ "<unk>",
7
+ "<s>",
8
+ "</s>",
9
+ "[INST]",
10
+ "[/INST]",
11
+ "[AVAILABLE_TOOLS]",
12
+ "[/AVAILABLE_TOOLS]",
13
+ "[TOOL_RESULTS]",
14
+ "[/TOOL_RESULTS]",
15
+ "[TOOL_CALLS]",
16
+ "[IMG]",
17
+ "<pad>",
18
+ "[IMG_BREAK]",
19
+ "[IMG_END]",
20
+ "[PREFIX]",
21
+ "[MIDDLE]",
22
+ "[SUFFIX]",
23
+ "[SYSTEM_PROMPT]",
24
+ "[/SYSTEM_PROMPT]",
25
+ "[TOOL_CONTENT]",
26
+ "<SPECIAL_20>",
27
+ "<SPECIAL_21>",
28
+ "<SPECIAL_22>",
29
+ "<SPECIAL_23>",
30
+ "[AUDIO]",
31
+ "[BEGIN_AUDIO]",
32
+ "<SPECIAL_26>",
33
+ "<SPECIAL_27>",
34
+ "<SPECIAL_28>",
35
+ "<SPECIAL_29>",
36
+ "<SPECIAL_30>",
37
+ "<SPECIAL_31>",
38
+ "[ARGS]",
39
+ "[CALL_ID]",
40
+ "[THINK]",
41
+ "[/THINK]",
42
+ "<SPECIAL_36>",
43
+ "<SPECIAL_37>",
44
+ "<SPECIAL_38>",
45
+ "<SPECIAL_39>",
46
+ "<SPECIAL_40>",
47
+ "<SPECIAL_41>",
48
+ "<SPECIAL_42>",
49
+ "<SPECIAL_43>",
50
+ "<SPECIAL_44>",
51
+ "<SPECIAL_45>",
52
+ "<SPECIAL_46>",
53
+ "<SPECIAL_47>",
54
+ "<SPECIAL_48>",
55
+ "<SPECIAL_49>",
56
+ "<SPECIAL_50>",
57
+ "<SPECIAL_51>",
58
+ "<SPECIAL_52>",
59
+ "<SPECIAL_53>",
60
+ "<SPECIAL_54>",
61
+ "<SPECIAL_55>",
62
+ "<SPECIAL_56>",
63
+ "<SPECIAL_57>",
64
+ "<SPECIAL_58>",
65
+ "<SPECIAL_59>",
66
+ "<SPECIAL_60>",
67
+ "<SPECIAL_61>",
68
+ "<SPECIAL_62>",
69
+ "<SPECIAL_63>",
70
+ "<SPECIAL_64>",
71
+ "<SPECIAL_65>",
72
+ "<SPECIAL_66>",
73
+ "<SPECIAL_67>",
74
+ "<SPECIAL_68>",
75
+ "<SPECIAL_69>",
76
+ "<SPECIAL_70>",
77
+ "<SPECIAL_71>",
78
+ "<SPECIAL_72>",
79
+ "<SPECIAL_73>",
80
+ "<SPECIAL_74>",
81
+ "<SPECIAL_75>",
82
+ "<SPECIAL_76>",
83
+ "<SPECIAL_77>",
84
+ "<SPECIAL_78>",
85
+ "<SPECIAL_79>",
86
+ "<SPECIAL_80>",
87
+ "<SPECIAL_81>",
88
+ "<SPECIAL_82>",
89
+ "<SPECIAL_83>",
90
+ "<SPECIAL_84>",
91
+ "<SPECIAL_85>",
92
+ "<SPECIAL_86>",
93
+ "<SPECIAL_87>",
94
+ "<SPECIAL_88>",
95
+ "<SPECIAL_89>",
96
+ "<SPECIAL_90>",
97
+ "<SPECIAL_91>",
98
+ "<SPECIAL_92>",
99
+ "<SPECIAL_93>",
100
+ "<SPECIAL_94>",
101
+ "<SPECIAL_95>",
102
+ "<SPECIAL_96>",
103
+ "<SPECIAL_97>",
104
+ "<SPECIAL_98>",
105
+ "<SPECIAL_99>",
106
+ "<SPECIAL_100>",
107
+ "<SPECIAL_101>",
108
+ "<SPECIAL_102>",
109
+ "<SPECIAL_103>",
110
+ "<SPECIAL_104>",
111
+ "<SPECIAL_105>",
112
+ "<SPECIAL_106>",
113
+ "<SPECIAL_107>",
114
+ "<SPECIAL_108>",
115
+ "<SPECIAL_109>",
116
+ "<SPECIAL_110>",
117
+ "<SPECIAL_111>",
118
+ "<SPECIAL_112>",
119
+ "<SPECIAL_113>",
120
+ "<SPECIAL_114>",
121
+ "<SPECIAL_115>",
122
+ "<SPECIAL_116>",
123
+ "<SPECIAL_117>",
124
+ "<SPECIAL_118>",
125
+ "<SPECIAL_119>",
126
+ "<SPECIAL_120>",
127
+ "<SPECIAL_121>",
128
+ "<SPECIAL_122>",
129
+ "<SPECIAL_123>",
130
+ "<SPECIAL_124>",
131
+ "<SPECIAL_125>",
132
+ "<SPECIAL_126>",
133
+ "<SPECIAL_127>",
134
+ "<SPECIAL_128>",
135
+ "<SPECIAL_129>",
136
+ "<SPECIAL_130>",
137
+ "<SPECIAL_131>",
138
+ "<SPECIAL_132>",
139
+ "<SPECIAL_133>",
140
+ "<SPECIAL_134>",
141
+ "<SPECIAL_135>",
142
+ "<SPECIAL_136>",
143
+ "<SPECIAL_137>",
144
+ "<SPECIAL_138>",
145
+ "<SPECIAL_139>",
146
+ "<SPECIAL_140>",
147
+ "<SPECIAL_141>",
148
+ "<SPECIAL_142>",
149
+ "<SPECIAL_143>",
150
+ "<SPECIAL_144>",
151
+ "<SPECIAL_145>",
152
+ "<SPECIAL_146>",
153
+ "<SPECIAL_147>",
154
+ "<SPECIAL_148>",
155
+ "<SPECIAL_149>",
156
+ "<SPECIAL_150>",
157
+ "<SPECIAL_151>",
158
+ "<SPECIAL_152>",
159
+ "<SPECIAL_153>",
160
+ "<SPECIAL_154>",
161
+ "<SPECIAL_155>",
162
+ "<SPECIAL_156>",
163
+ "<SPECIAL_157>",
164
+ "<SPECIAL_158>",
165
+ "<SPECIAL_159>",
166
+ "<SPECIAL_160>",
167
+ "<SPECIAL_161>",
168
+ "<SPECIAL_162>",
169
+ "<SPECIAL_163>",
170
+ "<SPECIAL_164>",
171
+ "<SPECIAL_165>",
172
+ "<SPECIAL_166>",
173
+ "<SPECIAL_167>",
174
+ "<SPECIAL_168>",
175
+ "<SPECIAL_169>",
176
+ "<SPECIAL_170>",
177
+ "<SPECIAL_171>",
178
+ "<SPECIAL_172>",
179
+ "<SPECIAL_173>",
180
+ "<SPECIAL_174>",
181
+ "<SPECIAL_175>",
182
+ "<SPECIAL_176>",
183
+ "<SPECIAL_177>",
184
+ "<SPECIAL_178>",
185
+ "<SPECIAL_179>",
186
+ "<SPECIAL_180>",
187
+ "<SPECIAL_181>",
188
+ "<SPECIAL_182>",
189
+ "<SPECIAL_183>",
190
+ "<SPECIAL_184>",
191
+ "<SPECIAL_185>",
192
+ "<SPECIAL_186>",
193
+ "<SPECIAL_187>",
194
+ "<SPECIAL_188>",
195
+ "<SPECIAL_189>",
196
+ "<SPECIAL_190>",
197
+ "<SPECIAL_191>",
198
+ "<SPECIAL_192>",
199
+ "<SPECIAL_193>",
200
+ "<SPECIAL_194>",
201
+ "<SPECIAL_195>",
202
+ "<SPECIAL_196>",
203
+ "<SPECIAL_197>",
204
+ "<SPECIAL_198>",
205
+ "<SPECIAL_199>",
206
+ "<SPECIAL_200>",
207
+ "<SPECIAL_201>",
208
+ "<SPECIAL_202>",
209
+ "<SPECIAL_203>",
210
+ "<SPECIAL_204>",
211
+ "<SPECIAL_205>",
212
+ "<SPECIAL_206>",
213
+ "<SPECIAL_207>",
214
+ "<SPECIAL_208>",
215
+ "<SPECIAL_209>",
216
+ "<SPECIAL_210>",
217
+ "<SPECIAL_211>",
218
+ "<SPECIAL_212>",
219
+ "<SPECIAL_213>",
220
+ "<SPECIAL_214>",
221
+ "<SPECIAL_215>",
222
+ "<SPECIAL_216>",
223
+ "<SPECIAL_217>",
224
+ "<SPECIAL_218>",
225
+ "<SPECIAL_219>",
226
+ "<SPECIAL_220>",
227
+ "<SPECIAL_221>",
228
+ "<SPECIAL_222>",
229
+ "<SPECIAL_223>",
230
+ "<SPECIAL_224>",
231
+ "<SPECIAL_225>",
232
+ "<SPECIAL_226>",
233
+ "<SPECIAL_227>",
234
+ "<SPECIAL_228>",
235
+ "<SPECIAL_229>",
236
+ "<SPECIAL_230>",
237
+ "<SPECIAL_231>",
238
+ "<SPECIAL_232>",
239
+ "<SPECIAL_233>",
240
+ "<SPECIAL_234>",
241
+ "<SPECIAL_235>",
242
+ "<SPECIAL_236>",
243
+ "<SPECIAL_237>",
244
+ "<SPECIAL_238>",
245
+ "<SPECIAL_239>",
246
+ "<SPECIAL_240>",
247
+ "<SPECIAL_241>",
248
+ "<SPECIAL_242>",
249
+ "<SPECIAL_243>",
250
+ "<SPECIAL_244>",
251
+ "<SPECIAL_245>",
252
+ "<SPECIAL_246>",
253
+ "<SPECIAL_247>",
254
+ "<SPECIAL_248>",
255
+ "<SPECIAL_249>",
256
+ "<SPECIAL_250>",
257
+ "<SPECIAL_251>",
258
+ "<SPECIAL_252>",
259
+ "<SPECIAL_253>",
260
+ "<SPECIAL_254>",
261
+ "<SPECIAL_255>",
262
+ "<SPECIAL_256>",
263
+ "<SPECIAL_257>",
264
+ "<SPECIAL_258>",
265
+ "<SPECIAL_259>",
266
+ "<SPECIAL_260>",
267
+ "<SPECIAL_261>",
268
+ "<SPECIAL_262>",
269
+ "<SPECIAL_263>",
270
+ "<SPECIAL_264>",
271
+ "<SPECIAL_265>",
272
+ "<SPECIAL_266>",
273
+ "<SPECIAL_267>",
274
+ "<SPECIAL_268>",
275
+ "<SPECIAL_269>",
276
+ "<SPECIAL_270>",
277
+ "<SPECIAL_271>",
278
+ "<SPECIAL_272>",
279
+ "<SPECIAL_273>",
280
+ "<SPECIAL_274>",
281
+ "<SPECIAL_275>",
282
+ "<SPECIAL_276>",
283
+ "<SPECIAL_277>",
284
+ "<SPECIAL_278>",
285
+ "<SPECIAL_279>",
286
+ "<SPECIAL_280>",
287
+ "<SPECIAL_281>",
288
+ "<SPECIAL_282>",
289
+ "<SPECIAL_283>",
290
+ "<SPECIAL_284>",
291
+ "<SPECIAL_285>",
292
+ "<SPECIAL_286>",
293
+ "<SPECIAL_287>",
294
+ "<SPECIAL_288>",
295
+ "<SPECIAL_289>",
296
+ "<SPECIAL_290>",
297
+ "<SPECIAL_291>",
298
+ "<SPECIAL_292>",
299
+ "<SPECIAL_293>",
300
+ "<SPECIAL_294>",
301
+ "<SPECIAL_295>",
302
+ "<SPECIAL_296>",
303
+ "<SPECIAL_297>",
304
+ "<SPECIAL_298>",
305
+ "<SPECIAL_299>",
306
+ "<SPECIAL_300>",
307
+ "<SPECIAL_301>",
308
+ "<SPECIAL_302>",
309
+ "<SPECIAL_303>",
310
+ "<SPECIAL_304>",
311
+ "<SPECIAL_305>",
312
+ "<SPECIAL_306>",
313
+ "<SPECIAL_307>",
314
+ "<SPECIAL_308>",
315
+ "<SPECIAL_309>",
316
+ "<SPECIAL_310>",
317
+ "<SPECIAL_311>",
318
+ "<SPECIAL_312>",
319
+ "<SPECIAL_313>",
320
+ "<SPECIAL_314>",
321
+ "<SPECIAL_315>",
322
+ "<SPECIAL_316>",
323
+ "<SPECIAL_317>",
324
+ "<SPECIAL_318>",
325
+ "<SPECIAL_319>",
326
+ "<SPECIAL_320>",
327
+ "<SPECIAL_321>",
328
+ "<SPECIAL_322>",
329
+ "<SPECIAL_323>",
330
+ "<SPECIAL_324>",
331
+ "<SPECIAL_325>",
332
+ "<SPECIAL_326>",
333
+ "<SPECIAL_327>",
334
+ "<SPECIAL_328>",
335
+ "<SPECIAL_329>",
336
+ "<SPECIAL_330>",
337
+ "<SPECIAL_331>",
338
+ "<SPECIAL_332>",
339
+ "<SPECIAL_333>",
340
+ "<SPECIAL_334>",
341
+ "<SPECIAL_335>",
342
+ "<SPECIAL_336>",
343
+ "<SPECIAL_337>",
344
+ "<SPECIAL_338>",
345
+ "<SPECIAL_339>",
346
+ "<SPECIAL_340>",
347
+ "<SPECIAL_341>",
348
+ "<SPECIAL_342>",
349
+ "<SPECIAL_343>",
350
+ "<SPECIAL_344>",
351
+ "<SPECIAL_345>",
352
+ "<SPECIAL_346>",
353
+ "<SPECIAL_347>",
354
+ "<SPECIAL_348>",
355
+ "<SPECIAL_349>",
356
+ "<SPECIAL_350>",
357
+ "<SPECIAL_351>",
358
+ "<SPECIAL_352>",
359
+ "<SPECIAL_353>",
360
+ "<SPECIAL_354>",
361
+ "<SPECIAL_355>",
362
+ "<SPECIAL_356>",
363
+ "<SPECIAL_357>",
364
+ "<SPECIAL_358>",
365
+ "<SPECIAL_359>",
366
+ "<SPECIAL_360>",
367
+ "<SPECIAL_361>",
368
+ "<SPECIAL_362>",
369
+ "<SPECIAL_363>",
370
+ "<SPECIAL_364>",
371
+ "<SPECIAL_365>",
372
+ "<SPECIAL_366>",
373
+ "<SPECIAL_367>",
374
+ "<SPECIAL_368>",
375
+ "<SPECIAL_369>",
376
+ "<SPECIAL_370>",
377
+ "<SPECIAL_371>",
378
+ "<SPECIAL_372>",
379
+ "<SPECIAL_373>",
380
+ "<SPECIAL_374>",
381
+ "<SPECIAL_375>",
382
+ "<SPECIAL_376>",
383
+ "<SPECIAL_377>",
384
+ "<SPECIAL_378>",
385
+ "<SPECIAL_379>",
386
+ "<SPECIAL_380>",
387
+ "<SPECIAL_381>",
388
+ "<SPECIAL_382>",
389
+ "<SPECIAL_383>",
390
+ "<SPECIAL_384>",
391
+ "<SPECIAL_385>",
392
+ "<SPECIAL_386>",
393
+ "<SPECIAL_387>",
394
+ "<SPECIAL_388>",
395
+ "<SPECIAL_389>",
396
+ "<SPECIAL_390>",
397
+ "<SPECIAL_391>",
398
+ "<SPECIAL_392>",
399
+ "<SPECIAL_393>",
400
+ "<SPECIAL_394>",
401
+ "<SPECIAL_395>",
402
+ "<SPECIAL_396>",
403
+ "<SPECIAL_397>",
404
+ "<SPECIAL_398>",
405
+ "<SPECIAL_399>",
406
+ "<SPECIAL_400>",
407
+ "<SPECIAL_401>",
408
+ "<SPECIAL_402>",
409
+ "<SPECIAL_403>",
410
+ "<SPECIAL_404>",
411
+ "<SPECIAL_405>",
412
+ "<SPECIAL_406>",
413
+ "<SPECIAL_407>",
414
+ "<SPECIAL_408>",
415
+ "<SPECIAL_409>",
416
+ "<SPECIAL_410>",
417
+ "<SPECIAL_411>",
418
+ "<SPECIAL_412>",
419
+ "<SPECIAL_413>",
420
+ "<SPECIAL_414>",
421
+ "<SPECIAL_415>",
422
+ "<SPECIAL_416>",
423
+ "<SPECIAL_417>",
424
+ "<SPECIAL_418>",
425
+ "<SPECIAL_419>",
426
+ "<SPECIAL_420>",
427
+ "<SPECIAL_421>",
428
+ "<SPECIAL_422>",
429
+ "<SPECIAL_423>",
430
+ "<SPECIAL_424>",
431
+ "<SPECIAL_425>",
432
+ "<SPECIAL_426>",
433
+ "<SPECIAL_427>",
434
+ "<SPECIAL_428>",
435
+ "<SPECIAL_429>",
436
+ "<SPECIAL_430>",
437
+ "<SPECIAL_431>",
438
+ "<SPECIAL_432>",
439
+ "<SPECIAL_433>",
440
+ "<SPECIAL_434>",
441
+ "<SPECIAL_435>",
442
+ "<SPECIAL_436>",
443
+ "<SPECIAL_437>",
444
+ "<SPECIAL_438>",
445
+ "<SPECIAL_439>",
446
+ "<SPECIAL_440>",
447
+ "<SPECIAL_441>",
448
+ "<SPECIAL_442>",
449
+ "<SPECIAL_443>",
450
+ "<SPECIAL_444>",
451
+ "<SPECIAL_445>",
452
+ "<SPECIAL_446>",
453
+ "<SPECIAL_447>",
454
+ "<SPECIAL_448>",
455
+ "<SPECIAL_449>",
456
+ "<SPECIAL_450>",
457
+ "<SPECIAL_451>",
458
+ "<SPECIAL_452>",
459
+ "<SPECIAL_453>",
460
+ "<SPECIAL_454>",
461
+ "<SPECIAL_455>",
462
+ "<SPECIAL_456>",
463
+ "<SPECIAL_457>",
464
+ "<SPECIAL_458>",
465
+ "<SPECIAL_459>",
466
+ "<SPECIAL_460>",
467
+ "<SPECIAL_461>",
468
+ "<SPECIAL_462>",
469
+ "<SPECIAL_463>",
470
+ "<SPECIAL_464>",
471
+ "<SPECIAL_465>",
472
+ "<SPECIAL_466>",
473
+ "<SPECIAL_467>",
474
+ "<SPECIAL_468>",
475
+ "<SPECIAL_469>",
476
+ "<SPECIAL_470>",
477
+ "<SPECIAL_471>",
478
+ "<SPECIAL_472>",
479
+ "<SPECIAL_473>",
480
+ "<SPECIAL_474>",
481
+ "<SPECIAL_475>",
482
+ "<SPECIAL_476>",
483
+ "<SPECIAL_477>",
484
+ "<SPECIAL_478>",
485
+ "<SPECIAL_479>",
486
+ "<SPECIAL_480>",
487
+ "<SPECIAL_481>",
488
+ "<SPECIAL_482>",
489
+ "<SPECIAL_483>",
490
+ "<SPECIAL_484>",
491
+ "<SPECIAL_485>",
492
+ "<SPECIAL_486>",
493
+ "<SPECIAL_487>",
494
+ "<SPECIAL_488>",
495
+ "<SPECIAL_489>",
496
+ "<SPECIAL_490>",
497
+ "<SPECIAL_491>",
498
+ "<SPECIAL_492>",
499
+ "<SPECIAL_493>",
500
+ "<SPECIAL_494>",
501
+ "<SPECIAL_495>",
502
+ "<SPECIAL_496>",
503
+ "<SPECIAL_497>",
504
+ "<SPECIAL_498>",
505
+ "<SPECIAL_499>",
506
+ "<SPECIAL_500>",
507
+ "<SPECIAL_501>",
508
+ "<SPECIAL_502>",
509
+ "<SPECIAL_503>",
510
+ "<SPECIAL_504>",
511
+ "<SPECIAL_505>",
512
+ "<SPECIAL_506>",
513
+ "<SPECIAL_507>",
514
+ "<SPECIAL_508>",
515
+ "<SPECIAL_509>",
516
+ "<SPECIAL_510>",
517
+ "<SPECIAL_511>",
518
+ "<SPECIAL_512>",
519
+ "<SPECIAL_513>",
520
+ "<SPECIAL_514>",
521
+ "<SPECIAL_515>",
522
+ "<SPECIAL_516>",
523
+ "<SPECIAL_517>",
524
+ "<SPECIAL_518>",
525
+ "<SPECIAL_519>",
526
+ "<SPECIAL_520>",
527
+ "<SPECIAL_521>",
528
+ "<SPECIAL_522>",
529
+ "<SPECIAL_523>",
530
+ "<SPECIAL_524>",
531
+ "<SPECIAL_525>",
532
+ "<SPECIAL_526>",
533
+ "<SPECIAL_527>",
534
+ "<SPECIAL_528>",
535
+ "<SPECIAL_529>",
536
+ "<SPECIAL_530>",
537
+ "<SPECIAL_531>",
538
+ "<SPECIAL_532>",
539
+ "<SPECIAL_533>",
540
+ "<SPECIAL_534>",
541
+ "<SPECIAL_535>",
542
+ "<SPECIAL_536>",
543
+ "<SPECIAL_537>",
544
+ "<SPECIAL_538>",
545
+ "<SPECIAL_539>",
546
+ "<SPECIAL_540>",
547
+ "<SPECIAL_541>",
548
+ "<SPECIAL_542>",
549
+ "<SPECIAL_543>",
550
+ "<SPECIAL_544>",
551
+ "<SPECIAL_545>",
552
+ "<SPECIAL_546>",
553
+ "<SPECIAL_547>",
554
+ "<SPECIAL_548>",
555
+ "<SPECIAL_549>",
556
+ "<SPECIAL_550>",
557
+ "<SPECIAL_551>",
558
+ "<SPECIAL_552>",
559
+ "<SPECIAL_553>",
560
+ "<SPECIAL_554>",
561
+ "<SPECIAL_555>",
562
+ "<SPECIAL_556>",
563
+ "<SPECIAL_557>",
564
+ "<SPECIAL_558>",
565
+ "<SPECIAL_559>",
566
+ "<SPECIAL_560>",
567
+ "<SPECIAL_561>",
568
+ "<SPECIAL_562>",
569
+ "<SPECIAL_563>",
570
+ "<SPECIAL_564>",
571
+ "<SPECIAL_565>",
572
+ "<SPECIAL_566>",
573
+ "<SPECIAL_567>",
574
+ "<SPECIAL_568>",
575
+ "<SPECIAL_569>",
576
+ "<SPECIAL_570>",
577
+ "<SPECIAL_571>",
578
+ "<SPECIAL_572>",
579
+ "<SPECIAL_573>",
580
+ "<SPECIAL_574>",
581
+ "<SPECIAL_575>",
582
+ "<SPECIAL_576>",
583
+ "<SPECIAL_577>",
584
+ "<SPECIAL_578>",
585
+ "<SPECIAL_579>",
586
+ "<SPECIAL_580>",
587
+ "<SPECIAL_581>",
588
+ "<SPECIAL_582>",
589
+ "<SPECIAL_583>",
590
+ "<SPECIAL_584>",
591
+ "<SPECIAL_585>",
592
+ "<SPECIAL_586>",
593
+ "<SPECIAL_587>",
594
+ "<SPECIAL_588>",
595
+ "<SPECIAL_589>",
596
+ "<SPECIAL_590>",
597
+ "<SPECIAL_591>",
598
+ "<SPECIAL_592>",
599
+ "<SPECIAL_593>",
600
+ "<SPECIAL_594>",
601
+ "<SPECIAL_595>",
602
+ "<SPECIAL_596>",
603
+ "<SPECIAL_597>",
604
+ "<SPECIAL_598>",
605
+ "<SPECIAL_599>",
606
+ "<SPECIAL_600>",
607
+ "<SPECIAL_601>",
608
+ "<SPECIAL_602>",
609
+ "<SPECIAL_603>",
610
+ "<SPECIAL_604>",
611
+ "<SPECIAL_605>",
612
+ "<SPECIAL_606>",
613
+ "<SPECIAL_607>",
614
+ "<SPECIAL_608>",
615
+ "<SPECIAL_609>",
616
+ "<SPECIAL_610>",
617
+ "<SPECIAL_611>",
618
+ "<SPECIAL_612>",
619
+ "<SPECIAL_613>",
620
+ "<SPECIAL_614>",
621
+ "<SPECIAL_615>",
622
+ "<SPECIAL_616>",
623
+ "<SPECIAL_617>",
624
+ "<SPECIAL_618>",
625
+ "<SPECIAL_619>",
626
+ "<SPECIAL_620>",
627
+ "<SPECIAL_621>",
628
+ "<SPECIAL_622>",
629
+ "<SPECIAL_623>",
630
+ "<SPECIAL_624>",
631
+ "<SPECIAL_625>",
632
+ "<SPECIAL_626>",
633
+ "<SPECIAL_627>",
634
+ "<SPECIAL_628>",
635
+ "<SPECIAL_629>",
636
+ "<SPECIAL_630>",
637
+ "<SPECIAL_631>",
638
+ "<SPECIAL_632>",
639
+ "<SPECIAL_633>",
640
+ "<SPECIAL_634>",
641
+ "<SPECIAL_635>",
642
+ "<SPECIAL_636>",
643
+ "<SPECIAL_637>",
644
+ "<SPECIAL_638>",
645
+ "<SPECIAL_639>",
646
+ "<SPECIAL_640>",
647
+ "<SPECIAL_641>",
648
+ "<SPECIAL_642>",
649
+ "<SPECIAL_643>",
650
+ "<SPECIAL_644>",
651
+ "<SPECIAL_645>",
652
+ "<SPECIAL_646>",
653
+ "<SPECIAL_647>",
654
+ "<SPECIAL_648>",
655
+ "<SPECIAL_649>",
656
+ "<SPECIAL_650>",
657
+ "<SPECIAL_651>",
658
+ "<SPECIAL_652>",
659
+ "<SPECIAL_653>",
660
+ "<SPECIAL_654>",
661
+ "<SPECIAL_655>",
662
+ "<SPECIAL_656>",
663
+ "<SPECIAL_657>",
664
+ "<SPECIAL_658>",
665
+ "<SPECIAL_659>",
666
+ "<SPECIAL_660>",
667
+ "<SPECIAL_661>",
668
+ "<SPECIAL_662>",
669
+ "<SPECIAL_663>",
670
+ "<SPECIAL_664>",
671
+ "<SPECIAL_665>",
672
+ "<SPECIAL_666>",
673
+ "<SPECIAL_667>",
674
+ "<SPECIAL_668>",
675
+ "<SPECIAL_669>",
676
+ "<SPECIAL_670>",
677
+ "<SPECIAL_671>",
678
+ "<SPECIAL_672>",
679
+ "<SPECIAL_673>",
680
+ "<SPECIAL_674>",
681
+ "<SPECIAL_675>",
682
+ "<SPECIAL_676>",
683
+ "<SPECIAL_677>",
684
+ "<SPECIAL_678>",
685
+ "<SPECIAL_679>",
686
+ "<SPECIAL_680>",
687
+ "<SPECIAL_681>",
688
+ "<SPECIAL_682>",
689
+ "<SPECIAL_683>",
690
+ "<SPECIAL_684>",
691
+ "<SPECIAL_685>",
692
+ "<SPECIAL_686>",
693
+ "<SPECIAL_687>",
694
+ "<SPECIAL_688>",
695
+ "<SPECIAL_689>",
696
+ "<SPECIAL_690>",
697
+ "<SPECIAL_691>",
698
+ "<SPECIAL_692>",
699
+ "<SPECIAL_693>",
700
+ "<SPECIAL_694>",
701
+ "<SPECIAL_695>",
702
+ "<SPECIAL_696>",
703
+ "<SPECIAL_697>",
704
+ "<SPECIAL_698>",
705
+ "<SPECIAL_699>",
706
+ "<SPECIAL_700>",
707
+ "<SPECIAL_701>",
708
+ "<SPECIAL_702>",
709
+ "<SPECIAL_703>",
710
+ "<SPECIAL_704>",
711
+ "<SPECIAL_705>",
712
+ "<SPECIAL_706>",
713
+ "<SPECIAL_707>",
714
+ "<SPECIAL_708>",
715
+ "<SPECIAL_709>",
716
+ "<SPECIAL_710>",
717
+ "<SPECIAL_711>",
718
+ "<SPECIAL_712>",
719
+ "<SPECIAL_713>",
720
+ "<SPECIAL_714>",
721
+ "<SPECIAL_715>",
722
+ "<SPECIAL_716>",
723
+ "<SPECIAL_717>",
724
+ "<SPECIAL_718>",
725
+ "<SPECIAL_719>",
726
+ "<SPECIAL_720>",
727
+ "<SPECIAL_721>",
728
+ "<SPECIAL_722>",
729
+ "<SPECIAL_723>",
730
+ "<SPECIAL_724>",
731
+ "<SPECIAL_725>",
732
+ "<SPECIAL_726>",
733
+ "<SPECIAL_727>",
734
+ "<SPECIAL_728>",
735
+ "<SPECIAL_729>",
736
+ "<SPECIAL_730>",
737
+ "<SPECIAL_731>",
738
+ "<SPECIAL_732>",
739
+ "<SPECIAL_733>",
740
+ "<SPECIAL_734>",
741
+ "<SPECIAL_735>",
742
+ "<SPECIAL_736>",
743
+ "<SPECIAL_737>",
744
+ "<SPECIAL_738>",
745
+ "<SPECIAL_739>",
746
+ "<SPECIAL_740>",
747
+ "<SPECIAL_741>",
748
+ "<SPECIAL_742>",
749
+ "<SPECIAL_743>",
750
+ "<SPECIAL_744>",
751
+ "<SPECIAL_745>",
752
+ "<SPECIAL_746>",
753
+ "<SPECIAL_747>",
754
+ "<SPECIAL_748>",
755
+ "<SPECIAL_749>",
756
+ "<SPECIAL_750>",
757
+ "<SPECIAL_751>",
758
+ "<SPECIAL_752>",
759
+ "<SPECIAL_753>",
760
+ "<SPECIAL_754>",
761
+ "<SPECIAL_755>",
762
+ "<SPECIAL_756>",
763
+ "<SPECIAL_757>",
764
+ "<SPECIAL_758>",
765
+ "<SPECIAL_759>",
766
+ "<SPECIAL_760>",
767
+ "<SPECIAL_761>",
768
+ "<SPECIAL_762>",
769
+ "<SPECIAL_763>",
770
+ "<SPECIAL_764>",
771
+ "<SPECIAL_765>",
772
+ "<SPECIAL_766>",
773
+ "<SPECIAL_767>",
774
+ "<SPECIAL_768>",
775
+ "<SPECIAL_769>",
776
+ "<SPECIAL_770>",
777
+ "<SPECIAL_771>",
778
+ "<SPECIAL_772>",
779
+ "<SPECIAL_773>",
780
+ "<SPECIAL_774>",
781
+ "<SPECIAL_775>",
782
+ "<SPECIAL_776>",
783
+ "<SPECIAL_777>",
784
+ "<SPECIAL_778>",
785
+ "<SPECIAL_779>",
786
+ "<SPECIAL_780>",
787
+ "<SPECIAL_781>",
788
+ "<SPECIAL_782>",
789
+ "<SPECIAL_783>",
790
+ "<SPECIAL_784>",
791
+ "<SPECIAL_785>",
792
+ "<SPECIAL_786>",
793
+ "<SPECIAL_787>",
794
+ "<SPECIAL_788>",
795
+ "<SPECIAL_789>",
796
+ "<SPECIAL_790>",
797
+ "<SPECIAL_791>",
798
+ "<SPECIAL_792>",
799
+ "<SPECIAL_793>",
800
+ "<SPECIAL_794>",
801
+ "<SPECIAL_795>",
802
+ "<SPECIAL_796>",
803
+ "<SPECIAL_797>",
804
+ "<SPECIAL_798>",
805
+ "<SPECIAL_799>",
806
+ "<SPECIAL_800>",
807
+ "<SPECIAL_801>",
808
+ "<SPECIAL_802>",
809
+ "<SPECIAL_803>",
810
+ "<SPECIAL_804>",
811
+ "<SPECIAL_805>",
812
+ "<SPECIAL_806>",
813
+ "<SPECIAL_807>",
814
+ "<SPECIAL_808>",
815
+ "<SPECIAL_809>",
816
+ "<SPECIAL_810>",
817
+ "<SPECIAL_811>",
818
+ "<SPECIAL_812>",
819
+ "<SPECIAL_813>",
820
+ "<SPECIAL_814>",
821
+ "<SPECIAL_815>",
822
+ "<SPECIAL_816>",
823
+ "<SPECIAL_817>",
824
+ "<SPECIAL_818>",
825
+ "<SPECIAL_819>",
826
+ "<SPECIAL_820>",
827
+ "<SPECIAL_821>",
828
+ "<SPECIAL_822>",
829
+ "<SPECIAL_823>",
830
+ "<SPECIAL_824>",
831
+ "<SPECIAL_825>",
832
+ "<SPECIAL_826>",
833
+ "<SPECIAL_827>",
834
+ "<SPECIAL_828>",
835
+ "<SPECIAL_829>",
836
+ "<SPECIAL_830>",
837
+ "<SPECIAL_831>",
838
+ "<SPECIAL_832>",
839
+ "<SPECIAL_833>",
840
+ "<SPECIAL_834>",
841
+ "<SPECIAL_835>",
842
+ "<SPECIAL_836>",
843
+ "<SPECIAL_837>",
844
+ "<SPECIAL_838>",
845
+ "<SPECIAL_839>",
846
+ "<SPECIAL_840>",
847
+ "<SPECIAL_841>",
848
+ "<SPECIAL_842>",
849
+ "<SPECIAL_843>",
850
+ "<SPECIAL_844>",
851
+ "<SPECIAL_845>",
852
+ "<SPECIAL_846>",
853
+ "<SPECIAL_847>",
854
+ "<SPECIAL_848>",
855
+ "<SPECIAL_849>",
856
+ "<SPECIAL_850>",
857
+ "<SPECIAL_851>",
858
+ "<SPECIAL_852>",
859
+ "<SPECIAL_853>",
860
+ "<SPECIAL_854>",
861
+ "<SPECIAL_855>",
862
+ "<SPECIAL_856>",
863
+ "<SPECIAL_857>",
864
+ "<SPECIAL_858>",
865
+ "<SPECIAL_859>",
866
+ "<SPECIAL_860>",
867
+ "<SPECIAL_861>",
868
+ "<SPECIAL_862>",
869
+ "<SPECIAL_863>",
870
+ "<SPECIAL_864>",
871
+ "<SPECIAL_865>",
872
+ "<SPECIAL_866>",
873
+ "<SPECIAL_867>",
874
+ "<SPECIAL_868>",
875
+ "<SPECIAL_869>",
876
+ "<SPECIAL_870>",
877
+ "<SPECIAL_871>",
878
+ "<SPECIAL_872>",
879
+ "<SPECIAL_873>",
880
+ "<SPECIAL_874>",
881
+ "<SPECIAL_875>",
882
+ "<SPECIAL_876>",
883
+ "<SPECIAL_877>",
884
+ "<SPECIAL_878>",
885
+ "<SPECIAL_879>",
886
+ "<SPECIAL_880>",
887
+ "<SPECIAL_881>",
888
+ "<SPECIAL_882>",
889
+ "<SPECIAL_883>",
890
+ "<SPECIAL_884>",
891
+ "<SPECIAL_885>",
892
+ "<SPECIAL_886>",
893
+ "<SPECIAL_887>",
894
+ "<SPECIAL_888>",
895
+ "<SPECIAL_889>",
896
+ "<SPECIAL_890>",
897
+ "<SPECIAL_891>",
898
+ "<SPECIAL_892>",
899
+ "<SPECIAL_893>",
900
+ "<SPECIAL_894>",
901
+ "<SPECIAL_895>",
902
+ "<SPECIAL_896>",
903
+ "<SPECIAL_897>",
904
+ "<SPECIAL_898>",
905
+ "<SPECIAL_899>",
906
+ "<SPECIAL_900>",
907
+ "<SPECIAL_901>",
908
+ "<SPECIAL_902>",
909
+ "<SPECIAL_903>",
910
+ "<SPECIAL_904>",
911
+ "<SPECIAL_905>",
912
+ "<SPECIAL_906>",
913
+ "<SPECIAL_907>",
914
+ "<SPECIAL_908>",
915
+ "<SPECIAL_909>",
916
+ "<SPECIAL_910>",
917
+ "<SPECIAL_911>",
918
+ "<SPECIAL_912>",
919
+ "<SPECIAL_913>",
920
+ "<SPECIAL_914>",
921
+ "<SPECIAL_915>",
922
+ "<SPECIAL_916>",
923
+ "<SPECIAL_917>",
924
+ "<SPECIAL_918>",
925
+ "<SPECIAL_919>",
926
+ "<SPECIAL_920>",
927
+ "<SPECIAL_921>",
928
+ "<SPECIAL_922>",
929
+ "<SPECIAL_923>",
930
+ "<SPECIAL_924>",
931
+ "<SPECIAL_925>",
932
+ "<SPECIAL_926>",
933
+ "<SPECIAL_927>",
934
+ "<SPECIAL_928>",
935
+ "<SPECIAL_929>",
936
+ "<SPECIAL_930>",
937
+ "<SPECIAL_931>",
938
+ "<SPECIAL_932>",
939
+ "<SPECIAL_933>",
940
+ "<SPECIAL_934>",
941
+ "<SPECIAL_935>",
942
+ "<SPECIAL_936>",
943
+ "<SPECIAL_937>",
944
+ "<SPECIAL_938>",
945
+ "<SPECIAL_939>",
946
+ "<SPECIAL_940>",
947
+ "<SPECIAL_941>",
948
+ "<SPECIAL_942>",
949
+ "<SPECIAL_943>",
950
+ "<SPECIAL_944>",
951
+ "<SPECIAL_945>",
952
+ "<SPECIAL_946>",
953
+ "<SPECIAL_947>",
954
+ "<SPECIAL_948>",
955
+ "<SPECIAL_949>",
956
+ "<SPECIAL_950>",
957
+ "<SPECIAL_951>",
958
+ "<SPECIAL_952>",
959
+ "<SPECIAL_953>",
960
+ "<SPECIAL_954>",
961
+ "<SPECIAL_955>",
962
+ "<SPECIAL_956>",
963
+ "<SPECIAL_957>",
964
+ "<SPECIAL_958>",
965
+ "<SPECIAL_959>",
966
+ "<SPECIAL_960>",
967
+ "<SPECIAL_961>",
968
+ "<SPECIAL_962>",
969
+ "<SPECIAL_963>",
970
+ "<SPECIAL_964>",
971
+ "<SPECIAL_965>",
972
+ "<SPECIAL_966>",
973
+ "<SPECIAL_967>",
974
+ "<SPECIAL_968>",
975
+ "<SPECIAL_969>",
976
+ "<SPECIAL_970>",
977
+ "<SPECIAL_971>",
978
+ "<SPECIAL_972>",
979
+ "<SPECIAL_973>",
980
+ "<SPECIAL_974>",
981
+ "<SPECIAL_975>",
982
+ "<SPECIAL_976>",
983
+ "<SPECIAL_977>",
984
+ "<SPECIAL_978>",
985
+ "<SPECIAL_979>",
986
+ "<SPECIAL_980>",
987
+ "<SPECIAL_981>",
988
+ "<SPECIAL_982>",
989
+ "<SPECIAL_983>",
990
+ "<SPECIAL_984>",
991
+ "<SPECIAL_985>",
992
+ "<SPECIAL_986>",
993
+ "<SPECIAL_987>",
994
+ "<SPECIAL_988>",
995
+ "<SPECIAL_989>",
996
+ "<SPECIAL_990>",
997
+ "<SPECIAL_991>",
998
+ "<SPECIAL_992>",
999
+ "<SPECIAL_993>",
1000
+ "<SPECIAL_994>",
1001
+ "<SPECIAL_995>",
1002
+ "<SPECIAL_996>",
1003
+ "<SPECIAL_997>",
1004
+ "<SPECIAL_998>",
1005
+ "<SPECIAL_999>"
1006
+ ],
1007
+ "is_local": true,
1008
+ "model_max_length": 1000000000000000019884624838656,
1009
+ "pad_token": "<pad>",
1010
+ "processor_class": "PixtralProcessor",
1011
+ "tokenizer_class": "TokenizersBackend",
1012
+ "unk_token": "<unk>"
1013
+ }
cpt_devstral_24B/checkpoints/checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,2898 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 400,
3
+ "best_metric": 0.4318464398384094,
4
+ "best_model_checkpoint": "runs/cpt_run_v1/checkpoints/checkpoint-400",
5
+ "epoch": 1.1662716499544212,
6
+ "eval_steps": 50,
7
+ "global_step": 400,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0029170464904284413,
14
+ "grad_norm": 1.1577509641647339,
15
+ "learning_rate": 0.0,
16
+ "loss": 0.9893555045127869,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.005834092980856883,
21
+ "grad_norm": 0.9491796493530273,
22
+ "learning_rate": 2.8985507246376816e-07,
23
+ "loss": 0.8791205883026123,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.008751139471285323,
28
+ "grad_norm": 1.1600768566131592,
29
+ "learning_rate": 5.797101449275363e-07,
30
+ "loss": 0.9858248233795166,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.011668185961713765,
35
+ "grad_norm": 1.2298306226730347,
36
+ "learning_rate": 8.695652173913044e-07,
37
+ "loss": 1.0516364574432373,
38
+ "step": 4
39
+ },
40
+ {
41
+ "epoch": 0.014585232452142206,
42
+ "grad_norm": 0.9520533680915833,
43
+ "learning_rate": 1.1594202898550726e-06,
44
+ "loss": 0.8392249345779419,
45
+ "step": 5
46
+ },
47
+ {
48
+ "epoch": 0.017502278942570646,
49
+ "grad_norm": 1.2451188564300537,
50
+ "learning_rate": 1.4492753623188408e-06,
51
+ "loss": 1.0955077409744263,
52
+ "step": 6
53
+ },
54
+ {
55
+ "epoch": 0.02041932543299909,
56
+ "grad_norm": 1.1123991012573242,
57
+ "learning_rate": 1.7391304347826088e-06,
58
+ "loss": 0.9201866388320923,
59
+ "step": 7
60
+ },
61
+ {
62
+ "epoch": 0.02333637192342753,
63
+ "grad_norm": 0.9283139705657959,
64
+ "learning_rate": 2.028985507246377e-06,
65
+ "loss": 0.9770950078964233,
66
+ "step": 8
67
+ },
68
+ {
69
+ "epoch": 0.02625341841385597,
70
+ "grad_norm": 0.9589216113090515,
71
+ "learning_rate": 2.3188405797101453e-06,
72
+ "loss": 0.9442565441131592,
73
+ "step": 9
74
+ },
75
+ {
76
+ "epoch": 0.02917046490428441,
77
+ "grad_norm": 0.8866703510284424,
78
+ "learning_rate": 2.6086956521739132e-06,
79
+ "loss": 0.9354464411735535,
80
+ "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.03208751139471285,
84
+ "grad_norm": 0.7191241383552551,
85
+ "learning_rate": 2.8985507246376816e-06,
86
+ "loss": 0.7659736275672913,
87
+ "step": 11
88
+ },
89
+ {
90
+ "epoch": 0.03500455788514129,
91
+ "grad_norm": 0.9110142588615417,
92
+ "learning_rate": 3.188405797101449e-06,
93
+ "loss": 0.9319326877593994,
94
+ "step": 12
95
+ },
96
+ {
97
+ "epoch": 0.03792160437556973,
98
+ "grad_norm": 0.8754057288169861,
99
+ "learning_rate": 3.4782608695652175e-06,
100
+ "loss": 0.9819356203079224,
101
+ "step": 13
102
+ },
103
+ {
104
+ "epoch": 0.04083865086599818,
105
+ "grad_norm": 0.896181046962738,
106
+ "learning_rate": 3.768115942028986e-06,
107
+ "loss": 1.026316523551941,
108
+ "step": 14
109
+ },
110
+ {
111
+ "epoch": 0.04375569735642662,
112
+ "grad_norm": 0.6104832887649536,
113
+ "learning_rate": 4.057971014492754e-06,
114
+ "loss": 0.8427562713623047,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 0.04667274384685506,
119
+ "grad_norm": 0.6529208421707153,
120
+ "learning_rate": 4.347826086956522e-06,
121
+ "loss": 0.8496565222740173,
122
+ "step": 16
123
+ },
124
+ {
125
+ "epoch": 0.0495897903372835,
126
+ "grad_norm": 0.6319335699081421,
127
+ "learning_rate": 4.637681159420291e-06,
128
+ "loss": 0.9139047861099243,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 0.05250683682771194,
133
+ "grad_norm": 0.7458649277687073,
134
+ "learning_rate": 4.927536231884059e-06,
135
+ "loss": 0.8867442011833191,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 0.05542388331814038,
140
+ "grad_norm": 0.6179773211479187,
141
+ "learning_rate": 5.2173913043478265e-06,
142
+ "loss": 0.9579408168792725,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 0.05834092980856882,
147
+ "grad_norm": 0.794481635093689,
148
+ "learning_rate": 5.507246376811595e-06,
149
+ "loss": 0.8736554980278015,
150
+ "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.06125797629899726,
154
+ "grad_norm": 0.8356145620346069,
155
+ "learning_rate": 5.797101449275363e-06,
156
+ "loss": 0.9358762502670288,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.0641750227894257,
161
+ "grad_norm": 0.5891932845115662,
162
+ "learning_rate": 6.086956521739132e-06,
163
+ "loss": 0.8972038626670837,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.06709206927985414,
168
+ "grad_norm": 0.6931268572807312,
169
+ "learning_rate": 6.376811594202898e-06,
170
+ "loss": 0.9583507776260376,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.07000911577028258,
175
+ "grad_norm": 0.7298229336738586,
176
+ "learning_rate": 6.666666666666667e-06,
177
+ "loss": 0.8119489550590515,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.07292616226071102,
182
+ "grad_norm": 0.6419956684112549,
183
+ "learning_rate": 6.956521739130435e-06,
184
+ "loss": 0.9386100769042969,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.07584320875113947,
189
+ "grad_norm": 0.7508338689804077,
190
+ "learning_rate": 7.246376811594203e-06,
191
+ "loss": 0.9272583723068237,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.0787602552415679,
196
+ "grad_norm": 0.5848079919815063,
197
+ "learning_rate": 7.536231884057972e-06,
198
+ "loss": 0.8967856168746948,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.08167730173199636,
203
+ "grad_norm": 0.7384837865829468,
204
+ "learning_rate": 7.82608695652174e-06,
205
+ "loss": 0.8696568012237549,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.0845943482224248,
210
+ "grad_norm": 0.5069604516029358,
211
+ "learning_rate": 8.115942028985508e-06,
212
+ "loss": 0.9121193885803223,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.08751139471285324,
217
+ "grad_norm": 0.833165168762207,
218
+ "learning_rate": 8.405797101449275e-06,
219
+ "loss": 0.8180589079856873,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 0.09042844120328168,
224
+ "grad_norm": 0.6355920433998108,
225
+ "learning_rate": 8.695652173913044e-06,
226
+ "loss": 0.8640957474708557,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 0.09334548769371012,
231
+ "grad_norm": 1.0429315567016602,
232
+ "learning_rate": 8.985507246376812e-06,
233
+ "loss": 0.9517915844917297,
234
+ "step": 32
235
+ },
236
+ {
237
+ "epoch": 0.09626253418413856,
238
+ "grad_norm": 0.5875154733657837,
239
+ "learning_rate": 9.275362318840581e-06,
240
+ "loss": 0.9443603754043579,
241
+ "step": 33
242
+ },
243
+ {
244
+ "epoch": 0.099179580674567,
245
+ "grad_norm": 1.9913769960403442,
246
+ "learning_rate": 9.565217391304349e-06,
247
+ "loss": 0.9510866403579712,
248
+ "step": 34
249
+ },
250
+ {
251
+ "epoch": 0.10209662716499544,
252
+ "grad_norm": 0.5310097932815552,
253
+ "learning_rate": 9.855072463768118e-06,
254
+ "loss": 0.8653419613838196,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 0.10501367365542388,
259
+ "grad_norm": 0.624421238899231,
260
+ "learning_rate": 1.0144927536231885e-05,
261
+ "loss": 0.7941208481788635,
262
+ "step": 36
263
+ },
264
+ {
265
+ "epoch": 0.10793072014585232,
266
+ "grad_norm": 0.6314200758934021,
267
+ "learning_rate": 1.0434782608695653e-05,
268
+ "loss": 0.8931174278259277,
269
+ "step": 37
270
+ },
271
+ {
272
+ "epoch": 0.11084776663628076,
273
+ "grad_norm": 0.6272342205047607,
274
+ "learning_rate": 1.0724637681159422e-05,
275
+ "loss": 0.8978185057640076,
276
+ "step": 38
277
+ },
278
+ {
279
+ "epoch": 0.1137648131267092,
280
+ "grad_norm": 0.5711184740066528,
281
+ "learning_rate": 1.101449275362319e-05,
282
+ "loss": 0.808263897895813,
283
+ "step": 39
284
+ },
285
+ {
286
+ "epoch": 0.11668185961713765,
287
+ "grad_norm": 0.7581208944320679,
288
+ "learning_rate": 1.1304347826086957e-05,
289
+ "loss": 0.7456756830215454,
290
+ "step": 40
291
+ },
292
+ {
293
+ "epoch": 0.11959890610756609,
294
+ "grad_norm": 0.4989977180957794,
295
+ "learning_rate": 1.1594202898550726e-05,
296
+ "loss": 0.8273333311080933,
297
+ "step": 41
298
+ },
299
+ {
300
+ "epoch": 0.12251595259799453,
301
+ "grad_norm": 0.8602972626686096,
302
+ "learning_rate": 1.1884057971014494e-05,
303
+ "loss": 0.8514784574508667,
304
+ "step": 42
305
+ },
306
+ {
307
+ "epoch": 0.12543299908842298,
308
+ "grad_norm": 0.6918581128120422,
309
+ "learning_rate": 1.2173913043478263e-05,
310
+ "loss": 0.8182265162467957,
311
+ "step": 43
312
+ },
313
+ {
314
+ "epoch": 0.1283500455788514,
315
+ "grad_norm": 0.653099536895752,
316
+ "learning_rate": 1.2463768115942029e-05,
317
+ "loss": 0.8242791891098022,
318
+ "step": 44
319
+ },
320
+ {
321
+ "epoch": 0.13126709206927986,
322
+ "grad_norm": 0.7485584616661072,
323
+ "learning_rate": 1.2753623188405797e-05,
324
+ "loss": 0.8229591250419617,
325
+ "step": 45
326
+ },
327
+ {
328
+ "epoch": 0.1341841385597083,
329
+ "grad_norm": 0.6724833250045776,
330
+ "learning_rate": 1.3043478260869566e-05,
331
+ "loss": 0.8146833181381226,
332
+ "step": 46
333
+ },
334
+ {
335
+ "epoch": 0.13710118505013674,
336
+ "grad_norm": 0.857208251953125,
337
+ "learning_rate": 1.3333333333333333e-05,
338
+ "loss": 0.8154427409172058,
339
+ "step": 47
340
+ },
341
+ {
342
+ "epoch": 0.14001823154056517,
343
+ "grad_norm": 0.5559669137001038,
344
+ "learning_rate": 1.3623188405797103e-05,
345
+ "loss": 0.879005491733551,
346
+ "step": 48
347
+ },
348
+ {
349
+ "epoch": 0.14293527803099362,
350
+ "grad_norm": 0.5910897850990295,
351
+ "learning_rate": 1.391304347826087e-05,
352
+ "loss": 0.8148283362388611,
353
+ "step": 49
354
+ },
355
+ {
356
+ "epoch": 0.14585232452142205,
357
+ "grad_norm": 0.6478891372680664,
358
+ "learning_rate": 1.420289855072464e-05,
359
+ "loss": 0.8293006420135498,
360
+ "step": 50
361
+ },
362
+ {
363
+ "epoch": 0.14585232452142205,
364
+ "eval_loss": 0.7892261147499084,
365
+ "eval_runtime": 973.2157,
366
+ "eval_samples_per_second": 0.649,
367
+ "eval_steps_per_second": 0.649,
368
+ "step": 50
369
+ },
370
+ {
371
+ "epoch": 0.1487693710118505,
372
+ "grad_norm": 0.757882833480835,
373
+ "learning_rate": 1.4492753623188407e-05,
374
+ "loss": 0.8114852905273438,
375
+ "step": 51
376
+ },
377
+ {
378
+ "epoch": 0.15168641750227893,
379
+ "grad_norm": 0.8496116995811462,
380
+ "learning_rate": 1.4782608695652174e-05,
381
+ "loss": 0.7886185050010681,
382
+ "step": 52
383
+ },
384
+ {
385
+ "epoch": 0.15460346399270739,
386
+ "grad_norm": 0.6078857183456421,
387
+ "learning_rate": 1.5072463768115944e-05,
388
+ "loss": 0.7298170924186707,
389
+ "step": 53
390
+ },
391
+ {
392
+ "epoch": 0.1575205104831358,
393
+ "grad_norm": 0.5856835246086121,
394
+ "learning_rate": 1.536231884057971e-05,
395
+ "loss": 0.7407160997390747,
396
+ "step": 54
397
+ },
398
+ {
399
+ "epoch": 0.16043755697356427,
400
+ "grad_norm": 1.0533701181411743,
401
+ "learning_rate": 1.565217391304348e-05,
402
+ "loss": 0.7057831287384033,
403
+ "step": 55
404
+ },
405
+ {
406
+ "epoch": 0.16335460346399272,
407
+ "grad_norm": 0.8087610006332397,
408
+ "learning_rate": 1.5942028985507246e-05,
409
+ "loss": 0.7409019470214844,
410
+ "step": 56
411
+ },
412
+ {
413
+ "epoch": 0.16627164995442115,
414
+ "grad_norm": 0.629945695400238,
415
+ "learning_rate": 1.6231884057971015e-05,
416
+ "loss": 0.7768293023109436,
417
+ "step": 57
418
+ },
419
+ {
420
+ "epoch": 0.1691886964448496,
421
+ "grad_norm": 0.5187911987304688,
422
+ "learning_rate": 1.6521739130434785e-05,
423
+ "loss": 0.825718104839325,
424
+ "step": 58
425
+ },
426
+ {
427
+ "epoch": 0.17210574293527803,
428
+ "grad_norm": 0.5866358280181885,
429
+ "learning_rate": 1.681159420289855e-05,
430
+ "loss": 0.8575979471206665,
431
+ "step": 59
432
+ },
433
+ {
434
+ "epoch": 0.17502278942570648,
435
+ "grad_norm": 1.5098934173583984,
436
+ "learning_rate": 1.710144927536232e-05,
437
+ "loss": 0.8058848977088928,
438
+ "step": 60
439
+ },
440
+ {
441
+ "epoch": 0.1779398359161349,
442
+ "grad_norm": 0.6981958150863647,
443
+ "learning_rate": 1.739130434782609e-05,
444
+ "loss": 0.7640778422355652,
445
+ "step": 61
446
+ },
447
+ {
448
+ "epoch": 0.18085688240656336,
449
+ "grad_norm": 0.631349503993988,
450
+ "learning_rate": 1.7681159420289858e-05,
451
+ "loss": 0.7896331548690796,
452
+ "step": 62
453
+ },
454
+ {
455
+ "epoch": 0.1837739288969918,
456
+ "grad_norm": 0.6930747032165527,
457
+ "learning_rate": 1.7971014492753624e-05,
458
+ "loss": 0.6762524247169495,
459
+ "step": 63
460
+ },
461
+ {
462
+ "epoch": 0.18669097538742024,
463
+ "grad_norm": 0.599399209022522,
464
+ "learning_rate": 1.8260869565217393e-05,
465
+ "loss": 0.7285035848617554,
466
+ "step": 64
467
+ },
468
+ {
469
+ "epoch": 0.18960802187784867,
470
+ "grad_norm": 0.6194344758987427,
471
+ "learning_rate": 1.8550724637681162e-05,
472
+ "loss": 0.7682523131370544,
473
+ "step": 65
474
+ },
475
+ {
476
+ "epoch": 0.19252506836827712,
477
+ "grad_norm": 0.5691342949867249,
478
+ "learning_rate": 1.8840579710144928e-05,
479
+ "loss": 0.6791993379592896,
480
+ "step": 66
481
+ },
482
+ {
483
+ "epoch": 0.19544211485870555,
484
+ "grad_norm": 0.6257390379905701,
485
+ "learning_rate": 1.9130434782608697e-05,
486
+ "loss": 0.6744828224182129,
487
+ "step": 67
488
+ },
489
+ {
490
+ "epoch": 0.198359161349134,
491
+ "grad_norm": 0.5871018767356873,
492
+ "learning_rate": 1.9420289855072467e-05,
493
+ "loss": 0.7317330837249756,
494
+ "step": 68
495
+ },
496
+ {
497
+ "epoch": 0.20127620783956243,
498
+ "grad_norm": 1.0744612216949463,
499
+ "learning_rate": 1.9710144927536236e-05,
500
+ "loss": 0.6617178916931152,
501
+ "step": 69
502
+ },
503
+ {
504
+ "epoch": 0.2041932543299909,
505
+ "grad_norm": 0.675946831703186,
506
+ "learning_rate": 2e-05,
507
+ "loss": 0.7615712881088257,
508
+ "step": 70
509
+ },
510
+ {
511
+ "epoch": 0.2071103008204193,
512
+ "grad_norm": 0.7663411498069763,
513
+ "learning_rate": 1.9999870372100614e-05,
514
+ "loss": 0.7131291627883911,
515
+ "step": 71
516
+ },
517
+ {
518
+ "epoch": 0.21002734731084777,
519
+ "grad_norm": 0.6725395321846008,
520
+ "learning_rate": 1.9999481491763123e-05,
521
+ "loss": 0.7452989816665649,
522
+ "step": 72
523
+ },
524
+ {
525
+ "epoch": 0.21294439380127622,
526
+ "grad_norm": 0.6505664587020874,
527
+ "learning_rate": 1.9998833369069483e-05,
528
+ "loss": 0.7477136850357056,
529
+ "step": 73
530
+ },
531
+ {
532
+ "epoch": 0.21586144029170465,
533
+ "grad_norm": 0.7032860517501831,
534
+ "learning_rate": 1.9997926020822643e-05,
535
+ "loss": 0.6854275465011597,
536
+ "step": 74
537
+ },
538
+ {
539
+ "epoch": 0.2187784867821331,
540
+ "grad_norm": 0.645345151424408,
541
+ "learning_rate": 1.999675947054614e-05,
542
+ "loss": 0.7552425265312195,
543
+ "step": 75
544
+ },
545
+ {
546
+ "epoch": 0.22169553327256153,
547
+ "grad_norm": 0.6620492935180664,
548
+ "learning_rate": 1.9995333748483464e-05,
549
+ "loss": 0.7262853384017944,
550
+ "step": 76
551
+ },
552
+ {
553
+ "epoch": 0.22461257976298998,
554
+ "grad_norm": 0.6511455774307251,
555
+ "learning_rate": 1.9993648891597284e-05,
556
+ "loss": 0.7591732144355774,
557
+ "step": 77
558
+ },
559
+ {
560
+ "epoch": 0.2275296262534184,
561
+ "grad_norm": 0.6775254011154175,
562
+ "learning_rate": 1.9991704943568497e-05,
563
+ "loss": 0.7498704195022583,
564
+ "step": 78
565
+ },
566
+ {
567
+ "epoch": 0.23044667274384686,
568
+ "grad_norm": 0.8199896216392517,
569
+ "learning_rate": 1.9989501954795076e-05,
570
+ "loss": 0.7238684296607971,
571
+ "step": 79
572
+ },
573
+ {
574
+ "epoch": 0.2333637192342753,
575
+ "grad_norm": 0.8197569847106934,
576
+ "learning_rate": 1.998703998239079e-05,
577
+ "loss": 0.7028778195381165,
578
+ "step": 80
579
+ },
580
+ {
581
+ "epoch": 0.23628076572470375,
582
+ "grad_norm": 0.6602625250816345,
583
+ "learning_rate": 1.9984319090183692e-05,
584
+ "loss": 0.8842703104019165,
585
+ "step": 81
586
+ },
587
+ {
588
+ "epoch": 0.23919781221513217,
589
+ "grad_norm": 0.9587129354476929,
590
+ "learning_rate": 1.99813393487145e-05,
591
+ "loss": 0.732614278793335,
592
+ "step": 82
593
+ },
594
+ {
595
+ "epoch": 0.24211485870556063,
596
+ "grad_norm": 0.6822189092636108,
597
+ "learning_rate": 1.997810083523473e-05,
598
+ "loss": 0.7544928193092346,
599
+ "step": 83
600
+ },
601
+ {
602
+ "epoch": 0.24503190519598905,
603
+ "grad_norm": 0.8980082869529724,
604
+ "learning_rate": 1.9974603633704726e-05,
605
+ "loss": 0.6704054474830627,
606
+ "step": 84
607
+ },
608
+ {
609
+ "epoch": 0.2479489516864175,
610
+ "grad_norm": 0.7413425445556641,
611
+ "learning_rate": 1.9970847834791472e-05,
612
+ "loss": 0.693661093711853,
613
+ "step": 85
614
+ },
615
+ {
616
+ "epoch": 0.25086599817684596,
617
+ "grad_norm": 0.8314999341964722,
618
+ "learning_rate": 1.9966833535866223e-05,
619
+ "loss": 0.667654275894165,
620
+ "step": 86
621
+ },
622
+ {
623
+ "epoch": 0.25378304466727436,
624
+ "grad_norm": 0.7972444891929626,
625
+ "learning_rate": 1.9962560841002013e-05,
626
+ "loss": 0.8403134942054749,
627
+ "step": 87
628
+ },
629
+ {
630
+ "epoch": 0.2567000911577028,
631
+ "grad_norm": 0.8519951701164246,
632
+ "learning_rate": 1.995802986097093e-05,
633
+ "loss": 0.6897370219230652,
634
+ "step": 88
635
+ },
636
+ {
637
+ "epoch": 0.25961713764813127,
638
+ "grad_norm": 0.8268933892250061,
639
+ "learning_rate": 1.995324071324126e-05,
640
+ "loss": 0.6690632700920105,
641
+ "step": 89
642
+ },
643
+ {
644
+ "epoch": 0.2625341841385597,
645
+ "grad_norm": 0.7133983969688416,
646
+ "learning_rate": 1.9948193521974436e-05,
647
+ "loss": 0.6314147114753723,
648
+ "step": 90
649
+ },
650
+ {
651
+ "epoch": 0.2654512306289881,
652
+ "grad_norm": 0.889302134513855,
653
+ "learning_rate": 1.9942888418021814e-05,
654
+ "loss": 0.7389825582504272,
655
+ "step": 91
656
+ },
657
+ {
658
+ "epoch": 0.2683682771194166,
659
+ "grad_norm": 0.7022432088851929,
660
+ "learning_rate": 1.99373255389213e-05,
661
+ "loss": 0.6916261911392212,
662
+ "step": 92
663
+ },
664
+ {
665
+ "epoch": 0.27128532360984503,
666
+ "grad_norm": 0.696432888507843,
667
+ "learning_rate": 1.9931505028893748e-05,
668
+ "loss": 0.6908476948738098,
669
+ "step": 93
670
+ },
671
+ {
672
+ "epoch": 0.2742023701002735,
673
+ "grad_norm": 0.7667419910430908,
674
+ "learning_rate": 1.9925427038839267e-05,
675
+ "loss": 0.6500837206840515,
676
+ "step": 94
677
+ },
678
+ {
679
+ "epoch": 0.27711941659070194,
680
+ "grad_norm": 0.6974894404411316,
681
+ "learning_rate": 1.9919091726333265e-05,
682
+ "loss": 0.7059191465377808,
683
+ "step": 95
684
+ },
685
+ {
686
+ "epoch": 0.28003646308113034,
687
+ "grad_norm": 0.7047077417373657,
688
+ "learning_rate": 1.9912499255622397e-05,
689
+ "loss": 0.6287837624549866,
690
+ "step": 96
691
+ },
692
+ {
693
+ "epoch": 0.2829535095715588,
694
+ "grad_norm": 0.7729557156562805,
695
+ "learning_rate": 1.990564979762029e-05,
696
+ "loss": 0.6738612055778503,
697
+ "step": 97
698
+ },
699
+ {
700
+ "epoch": 0.28587055606198725,
701
+ "grad_norm": 0.7020529508590698,
702
+ "learning_rate": 1.989854352990311e-05,
703
+ "loss": 0.662042498588562,
704
+ "step": 98
705
+ },
706
+ {
707
+ "epoch": 0.2887876025524157,
708
+ "grad_norm": 0.7369800209999084,
709
+ "learning_rate": 1.9891180636704975e-05,
710
+ "loss": 0.6246830821037292,
711
+ "step": 99
712
+ },
713
+ {
714
+ "epoch": 0.2917046490428441,
715
+ "grad_norm": 0.7412623167037964,
716
+ "learning_rate": 1.9883561308913154e-05,
717
+ "loss": 0.6623879075050354,
718
+ "step": 100
719
+ },
720
+ {
721
+ "epoch": 0.2917046490428441,
722
+ "eval_loss": 0.6552971005439758,
723
+ "eval_runtime": 966.7072,
724
+ "eval_samples_per_second": 0.654,
725
+ "eval_steps_per_second": 0.654,
726
+ "step": 100
727
+ },
728
+ {
729
+ "epoch": 0.29462169553327255,
730
+ "grad_norm": 0.8428792953491211,
731
+ "learning_rate": 1.987568574406314e-05,
732
+ "loss": 0.6312171816825867,
733
+ "step": 101
734
+ },
735
+ {
736
+ "epoch": 0.297538742023701,
737
+ "grad_norm": 0.6948133707046509,
738
+ "learning_rate": 1.9867554146333517e-05,
739
+ "loss": 0.6266146898269653,
740
+ "step": 102
741
+ },
742
+ {
743
+ "epoch": 0.30045578851412946,
744
+ "grad_norm": 1.3897597789764404,
745
+ "learning_rate": 1.985916672654068e-05,
746
+ "loss": 0.6669265031814575,
747
+ "step": 103
748
+ },
749
+ {
750
+ "epoch": 0.30337283500455786,
751
+ "grad_norm": 0.8838400840759277,
752
+ "learning_rate": 1.985052370213334e-05,
753
+ "loss": 0.6601086854934692,
754
+ "step": 104
755
+ },
756
+ {
757
+ "epoch": 0.3062898814949863,
758
+ "grad_norm": 0.8471395373344421,
759
+ "learning_rate": 1.9841625297186925e-05,
760
+ "loss": 0.5984431505203247,
761
+ "step": 105
762
+ },
763
+ {
764
+ "epoch": 0.30920692798541477,
765
+ "grad_norm": 0.8940042853355408,
766
+ "learning_rate": 1.983247174239774e-05,
767
+ "loss": 0.7223822474479675,
768
+ "step": 106
769
+ },
770
+ {
771
+ "epoch": 0.3121239744758432,
772
+ "grad_norm": 0.7833696603775024,
773
+ "learning_rate": 1.9823063275076998e-05,
774
+ "loss": 0.6868705749511719,
775
+ "step": 107
776
+ },
777
+ {
778
+ "epoch": 0.3150410209662716,
779
+ "grad_norm": 0.8794649243354797,
780
+ "learning_rate": 1.9813400139144673e-05,
781
+ "loss": 0.6246675848960876,
782
+ "step": 108
783
+ },
784
+ {
785
+ "epoch": 0.3179580674567001,
786
+ "grad_norm": 0.8126057982444763,
787
+ "learning_rate": 1.9803482585123165e-05,
788
+ "loss": 0.5908697247505188,
789
+ "step": 109
790
+ },
791
+ {
792
+ "epoch": 0.32087511394712853,
793
+ "grad_norm": 0.7947676777839661,
794
+ "learning_rate": 1.979331087013082e-05,
795
+ "loss": 0.5751246809959412,
796
+ "step": 110
797
+ },
798
+ {
799
+ "epoch": 0.323792160437557,
800
+ "grad_norm": 0.713545560836792,
801
+ "learning_rate": 1.978288525787524e-05,
802
+ "loss": 0.6081106066703796,
803
+ "step": 111
804
+ },
805
+ {
806
+ "epoch": 0.32670920692798544,
807
+ "grad_norm": 1.011828064918518,
808
+ "learning_rate": 1.977220601864647e-05,
809
+ "loss": 0.7039169669151306,
810
+ "step": 112
811
+ },
812
+ {
813
+ "epoch": 0.32962625341841384,
814
+ "grad_norm": 0.730570912361145,
815
+ "learning_rate": 1.9761273429309982e-05,
816
+ "loss": 0.6140255928039551,
817
+ "step": 113
818
+ },
819
+ {
820
+ "epoch": 0.3325432999088423,
821
+ "grad_norm": 1.059688687324524,
822
+ "learning_rate": 1.9750087773299492e-05,
823
+ "loss": 0.648114025592804,
824
+ "step": 114
825
+ },
826
+ {
827
+ "epoch": 0.33546034639927075,
828
+ "grad_norm": 0.9336895942687988,
829
+ "learning_rate": 1.973864934060962e-05,
830
+ "loss": 0.622555673122406,
831
+ "step": 115
832
+ },
833
+ {
834
+ "epoch": 0.3383773928896992,
835
+ "grad_norm": 0.7195945978164673,
836
+ "learning_rate": 1.9726958427788367e-05,
837
+ "loss": 0.70485520362854,
838
+ "step": 116
839
+ },
840
+ {
841
+ "epoch": 0.3412944393801276,
842
+ "grad_norm": 0.8101872801780701,
843
+ "learning_rate": 1.971501533792942e-05,
844
+ "loss": 0.6958848834037781,
845
+ "step": 117
846
+ },
847
+ {
848
+ "epoch": 0.34421148587055606,
849
+ "grad_norm": 1.6075212955474854,
850
+ "learning_rate": 1.970282038066432e-05,
851
+ "loss": 0.6021550893783569,
852
+ "step": 118
853
+ },
854
+ {
855
+ "epoch": 0.3471285323609845,
856
+ "grad_norm": 0.7881433963775635,
857
+ "learning_rate": 1.9690373872154396e-05,
858
+ "loss": 0.6449777483940125,
859
+ "step": 119
860
+ },
861
+ {
862
+ "epoch": 0.35004557885141296,
863
+ "grad_norm": 1.014639973640442,
864
+ "learning_rate": 1.9677676135082606e-05,
865
+ "loss": 0.5939379930496216,
866
+ "step": 120
867
+ },
868
+ {
869
+ "epoch": 0.35296262534184136,
870
+ "grad_norm": 0.8198449611663818,
871
+ "learning_rate": 1.9664727498645144e-05,
872
+ "loss": 0.6210286617279053,
873
+ "step": 121
874
+ },
875
+ {
876
+ "epoch": 0.3558796718322698,
877
+ "grad_norm": 1.0194576978683472,
878
+ "learning_rate": 1.9651528298542918e-05,
879
+ "loss": 0.624247670173645,
880
+ "step": 122
881
+ },
882
+ {
883
+ "epoch": 0.35879671832269827,
884
+ "grad_norm": 0.7963470220565796,
885
+ "learning_rate": 1.9638078876972842e-05,
886
+ "loss": 0.6479315757751465,
887
+ "step": 123
888
+ },
889
+ {
890
+ "epoch": 0.3617137648131267,
891
+ "grad_norm": 0.9007541537284851,
892
+ "learning_rate": 1.9624379582618976e-05,
893
+ "loss": 0.6131505370140076,
894
+ "step": 124
895
+ },
896
+ {
897
+ "epoch": 0.3646308113035551,
898
+ "grad_norm": 0.8712120056152344,
899
+ "learning_rate": 1.9610430770643464e-05,
900
+ "loss": 0.6249448657035828,
901
+ "step": 125
902
+ },
903
+ {
904
+ "epoch": 0.3675478577939836,
905
+ "grad_norm": 1.1482540369033813,
906
+ "learning_rate": 1.9596232802677347e-05,
907
+ "loss": 0.5844688415527344,
908
+ "step": 126
909
+ },
910
+ {
911
+ "epoch": 0.37046490428441203,
912
+ "grad_norm": 0.8662379384040833,
913
+ "learning_rate": 1.9581786046811175e-05,
914
+ "loss": 0.6573485732078552,
915
+ "step": 127
916
+ },
917
+ {
918
+ "epoch": 0.3733819507748405,
919
+ "grad_norm": 0.8191388845443726,
920
+ "learning_rate": 1.9567090877585477e-05,
921
+ "loss": 0.5896862745285034,
922
+ "step": 128
923
+ },
924
+ {
925
+ "epoch": 0.37629899726526894,
926
+ "grad_norm": 1.0187078714370728,
927
+ "learning_rate": 1.955214767598103e-05,
928
+ "loss": 0.613490879535675,
929
+ "step": 129
930
+ },
931
+ {
932
+ "epoch": 0.37921604375569734,
933
+ "grad_norm": 0.8444119691848755,
934
+ "learning_rate": 1.953695682940901e-05,
935
+ "loss": 0.727687656879425,
936
+ "step": 130
937
+ },
938
+ {
939
+ "epoch": 0.3821330902461258,
940
+ "grad_norm": 0.74753737449646,
941
+ "learning_rate": 1.9521518731700913e-05,
942
+ "loss": 0.6102436780929565,
943
+ "step": 131
944
+ },
945
+ {
946
+ "epoch": 0.38505013673655425,
947
+ "grad_norm": 1.0166202783584595,
948
+ "learning_rate": 1.9505833783098378e-05,
949
+ "loss": 0.6244844198226929,
950
+ "step": 132
951
+ },
952
+ {
953
+ "epoch": 0.3879671832269827,
954
+ "grad_norm": 0.8175772428512573,
955
+ "learning_rate": 1.9489902390242793e-05,
956
+ "loss": 0.5939282178878784,
957
+ "step": 133
958
+ },
959
+ {
960
+ "epoch": 0.3908842297174111,
961
+ "grad_norm": 1.0177713632583618,
962
+ "learning_rate": 1.947372496616476e-05,
963
+ "loss": 0.6418229937553406,
964
+ "step": 134
965
+ },
966
+ {
967
+ "epoch": 0.39380127620783956,
968
+ "grad_norm": 0.8652453422546387,
969
+ "learning_rate": 1.9457301930273376e-05,
970
+ "loss": 0.5870395302772522,
971
+ "step": 135
972
+ },
973
+ {
974
+ "epoch": 0.396718322698268,
975
+ "grad_norm": 0.8378894925117493,
976
+ "learning_rate": 1.9440633708345365e-05,
977
+ "loss": 0.6480278372764587,
978
+ "step": 136
979
+ },
980
+ {
981
+ "epoch": 0.39963536918869647,
982
+ "grad_norm": 0.8303541541099548,
983
+ "learning_rate": 1.9423720732514052e-05,
984
+ "loss": 0.6191359758377075,
985
+ "step": 137
986
+ },
987
+ {
988
+ "epoch": 0.40255241567912486,
989
+ "grad_norm": 0.8576734662055969,
990
+ "learning_rate": 1.9406563441258145e-05,
991
+ "loss": 0.5696198344230652,
992
+ "step": 138
993
+ },
994
+ {
995
+ "epoch": 0.4054694621695533,
996
+ "grad_norm": 0.9558727145195007,
997
+ "learning_rate": 1.9389162279390362e-05,
998
+ "loss": 0.6177623271942139,
999
+ "step": 139
1000
+ },
1001
+ {
1002
+ "epoch": 0.4083865086599818,
1003
+ "grad_norm": 0.7046042084693909,
1004
+ "learning_rate": 1.9371517698045922e-05,
1005
+ "loss": 0.5836521983146667,
1006
+ "step": 140
1007
+ },
1008
+ {
1009
+ "epoch": 0.4113035551504102,
1010
+ "grad_norm": 1.0522717237472534,
1011
+ "learning_rate": 1.935363015467082e-05,
1012
+ "loss": 0.5728275775909424,
1013
+ "step": 141
1014
+ },
1015
+ {
1016
+ "epoch": 0.4142206016408386,
1017
+ "grad_norm": 0.9554787874221802,
1018
+ "learning_rate": 1.933550011301e-05,
1019
+ "loss": 0.632586658000946,
1020
+ "step": 142
1021
+ },
1022
+ {
1023
+ "epoch": 0.4171376481312671,
1024
+ "grad_norm": 0.8874214291572571,
1025
+ "learning_rate": 1.9317128043095293e-05,
1026
+ "loss": 0.5850118398666382,
1027
+ "step": 143
1028
+ },
1029
+ {
1030
+ "epoch": 0.42005469462169553,
1031
+ "grad_norm": 1.0708963871002197,
1032
+ "learning_rate": 1.9298514421233276e-05,
1033
+ "loss": 0.6260685324668884,
1034
+ "step": 144
1035
+ },
1036
+ {
1037
+ "epoch": 0.422971741112124,
1038
+ "grad_norm": 0.8135736584663391,
1039
+ "learning_rate": 1.9279659729992888e-05,
1040
+ "loss": 0.6031094193458557,
1041
+ "step": 145
1042
+ },
1043
+ {
1044
+ "epoch": 0.42588878760255244,
1045
+ "grad_norm": 0.7971774339675903,
1046
+ "learning_rate": 1.9260564458192926e-05,
1047
+ "loss": 0.6101322770118713,
1048
+ "step": 146
1049
+ },
1050
+ {
1051
+ "epoch": 0.42880583409298084,
1052
+ "grad_norm": 0.9374974966049194,
1053
+ "learning_rate": 1.9241229100889397e-05,
1054
+ "loss": 0.5836313366889954,
1055
+ "step": 147
1056
+ },
1057
+ {
1058
+ "epoch": 0.4317228805834093,
1059
+ "grad_norm": 0.8043425679206848,
1060
+ "learning_rate": 1.9221654159362636e-05,
1061
+ "loss": 0.6181215047836304,
1062
+ "step": 148
1063
+ },
1064
+ {
1065
+ "epoch": 0.43463992707383775,
1066
+ "grad_norm": 0.8923380374908447,
1067
+ "learning_rate": 1.920184014110436e-05,
1068
+ "loss": 0.6149677634239197,
1069
+ "step": 149
1070
+ },
1071
+ {
1072
+ "epoch": 0.4375569735642662,
1073
+ "grad_norm": 0.8908132314682007,
1074
+ "learning_rate": 1.918178755980449e-05,
1075
+ "loss": 0.5899742841720581,
1076
+ "step": 150
1077
+ },
1078
+ {
1079
+ "epoch": 0.4375569735642662,
1080
+ "eval_loss": 0.5903874635696411,
1081
+ "eval_runtime": 1186.9542,
1082
+ "eval_samples_per_second": 0.532,
1083
+ "eval_steps_per_second": 0.532,
1084
+ "step": 150
1085
+ },
1086
+ {
1087
+ "epoch": 0.4404740200546946,
1088
+ "grad_norm": 1.060531497001648,
1089
+ "learning_rate": 1.9161496935337808e-05,
1090
+ "loss": 0.5852696895599365,
1091
+ "step": 151
1092
+ },
1093
+ {
1094
+ "epoch": 0.44339106654512306,
1095
+ "grad_norm": 0.9723032712936401,
1096
+ "learning_rate": 1.914096879375053e-05,
1097
+ "loss": 0.5822056531906128,
1098
+ "step": 152
1099
+ },
1100
+ {
1101
+ "epoch": 0.4463081130355515,
1102
+ "grad_norm": 0.9519931674003601,
1103
+ "learning_rate": 1.912020366724663e-05,
1104
+ "loss": 0.6183493137359619,
1105
+ "step": 153
1106
+ },
1107
+ {
1108
+ "epoch": 0.44922515952597997,
1109
+ "grad_norm": 0.8282918334007263,
1110
+ "learning_rate": 1.9099202094174055e-05,
1111
+ "loss": 0.6229860782623291,
1112
+ "step": 154
1113
+ },
1114
+ {
1115
+ "epoch": 0.45214220601640837,
1116
+ "grad_norm": 0.9251292943954468,
1117
+ "learning_rate": 1.907796461901076e-05,
1118
+ "loss": 0.6552959680557251,
1119
+ "step": 155
1120
+ },
1121
+ {
1122
+ "epoch": 0.4550592525068368,
1123
+ "grad_norm": 1.0349540710449219,
1124
+ "learning_rate": 1.9056491792350606e-05,
1125
+ "loss": 0.6170098781585693,
1126
+ "step": 156
1127
+ },
1128
+ {
1129
+ "epoch": 0.4579762989972653,
1130
+ "grad_norm": 0.8720711469650269,
1131
+ "learning_rate": 1.9034784170889076e-05,
1132
+ "loss": 0.5870137810707092,
1133
+ "step": 157
1134
+ },
1135
+ {
1136
+ "epoch": 0.46089334548769373,
1137
+ "grad_norm": 1.0785977840423584,
1138
+ "learning_rate": 1.9012842317408843e-05,
1139
+ "loss": 0.5515124201774597,
1140
+ "step": 158
1141
+ },
1142
+ {
1143
+ "epoch": 0.4638103919781221,
1144
+ "grad_norm": 1.0634154081344604,
1145
+ "learning_rate": 1.8990666800765187e-05,
1146
+ "loss": 0.6073828339576721,
1147
+ "step": 159
1148
+ },
1149
+ {
1150
+ "epoch": 0.4667274384685506,
1151
+ "grad_norm": 0.8770879507064819,
1152
+ "learning_rate": 1.896825819587123e-05,
1153
+ "loss": 0.5960907936096191,
1154
+ "step": 160
1155
+ },
1156
+ {
1157
+ "epoch": 0.46964448495897904,
1158
+ "grad_norm": 1.1225898265838623,
1159
+ "learning_rate": 1.894561708368305e-05,
1160
+ "loss": 0.545990526676178,
1161
+ "step": 161
1162
+ },
1163
+ {
1164
+ "epoch": 0.4725615314494075,
1165
+ "grad_norm": 0.9373893141746521,
1166
+ "learning_rate": 1.8922744051184613e-05,
1167
+ "loss": 0.5566108822822571,
1168
+ "step": 162
1169
+ },
1170
+ {
1171
+ "epoch": 0.4754785779398359,
1172
+ "grad_norm": 1.5016087293624878,
1173
+ "learning_rate": 1.8899639691372545e-05,
1174
+ "loss": 0.558845043182373,
1175
+ "step": 163
1176
+ },
1177
+ {
1178
+ "epoch": 0.47839562443026434,
1179
+ "grad_norm": 0.903020977973938,
1180
+ "learning_rate": 1.8876304603240773e-05,
1181
+ "loss": 0.6824233531951904,
1182
+ "step": 164
1183
+ },
1184
+ {
1185
+ "epoch": 0.4813126709206928,
1186
+ "grad_norm": 0.8239623308181763,
1187
+ "learning_rate": 1.8852739391764993e-05,
1188
+ "loss": 0.5630610585212708,
1189
+ "step": 165
1190
+ },
1191
+ {
1192
+ "epoch": 0.48422971741112125,
1193
+ "grad_norm": 0.926069438457489,
1194
+ "learning_rate": 1.882894466788697e-05,
1195
+ "loss": 0.6211802363395691,
1196
+ "step": 166
1197
+ },
1198
+ {
1199
+ "epoch": 0.4871467639015497,
1200
+ "grad_norm": 1.0098828077316284,
1201
+ "learning_rate": 1.8804921048498722e-05,
1202
+ "loss": 0.5513257384300232,
1203
+ "step": 167
1204
+ },
1205
+ {
1206
+ "epoch": 0.4900638103919781,
1207
+ "grad_norm": 0.9228141903877258,
1208
+ "learning_rate": 1.8780669156426517e-05,
1209
+ "loss": 0.6197121739387512,
1210
+ "step": 168
1211
+ },
1212
+ {
1213
+ "epoch": 0.49298085688240656,
1214
+ "grad_norm": 1.0551754236221313,
1215
+ "learning_rate": 1.8756189620414712e-05,
1216
+ "loss": 0.5221806764602661,
1217
+ "step": 169
1218
+ },
1219
+ {
1220
+ "epoch": 0.495897903372835,
1221
+ "grad_norm": 0.9017496109008789,
1222
+ "learning_rate": 1.873148307510948e-05,
1223
+ "loss": 0.5766995549201965,
1224
+ "step": 170
1225
+ },
1226
+ {
1227
+ "epoch": 0.49881494986326347,
1228
+ "grad_norm": 0.9704970717430115,
1229
+ "learning_rate": 1.870655016104233e-05,
1230
+ "loss": 0.6514763832092285,
1231
+ "step": 171
1232
+ },
1233
+ {
1234
+ "epoch": 0.5017319963536919,
1235
+ "grad_norm": 0.9972712397575378,
1236
+ "learning_rate": 1.8681391524613518e-05,
1237
+ "loss": 0.5273895263671875,
1238
+ "step": 172
1239
+ },
1240
+ {
1241
+ "epoch": 0.5046490428441204,
1242
+ "grad_norm": 0.9473339319229126,
1243
+ "learning_rate": 1.8656007818075288e-05,
1244
+ "loss": 0.5548599362373352,
1245
+ "step": 173
1246
+ },
1247
+ {
1248
+ "epoch": 0.5075660893345487,
1249
+ "grad_norm": 1.2493574619293213,
1250
+ "learning_rate": 1.8630399699514944e-05,
1251
+ "loss": 0.5593586564064026,
1252
+ "step": 174
1253
+ },
1254
+ {
1255
+ "epoch": 0.5104831358249772,
1256
+ "grad_norm": 1.2766696214675903,
1257
+ "learning_rate": 1.860456783283781e-05,
1258
+ "loss": 0.6054630279541016,
1259
+ "step": 175
1260
+ },
1261
+ {
1262
+ "epoch": 0.5134001823154056,
1263
+ "grad_norm": 0.9555240869522095,
1264
+ "learning_rate": 1.857851288775002e-05,
1265
+ "loss": 0.508592963218689,
1266
+ "step": 176
1267
+ },
1268
+ {
1269
+ "epoch": 0.5163172288058341,
1270
+ "grad_norm": 1.260219931602478,
1271
+ "learning_rate": 1.8552235539741118e-05,
1272
+ "loss": 0.5532065629959106,
1273
+ "step": 177
1274
+ },
1275
+ {
1276
+ "epoch": 0.5192342752962625,
1277
+ "grad_norm": 1.1859954595565796,
1278
+ "learning_rate": 1.8525736470066595e-05,
1279
+ "loss": 0.5683344006538391,
1280
+ "step": 178
1281
+ },
1282
+ {
1283
+ "epoch": 0.522151321786691,
1284
+ "grad_norm": 1.3044344186782837,
1285
+ "learning_rate": 1.8499016365730203e-05,
1286
+ "loss": 0.5281959772109985,
1287
+ "step": 179
1288
+ },
1289
+ {
1290
+ "epoch": 0.5250683682771194,
1291
+ "grad_norm": 1.3049921989440918,
1292
+ "learning_rate": 1.8472075919466137e-05,
1293
+ "loss": 0.49621230363845825,
1294
+ "step": 180
1295
+ },
1296
+ {
1297
+ "epoch": 0.5279854147675479,
1298
+ "grad_norm": 1.0488537549972534,
1299
+ "learning_rate": 1.844491582972109e-05,
1300
+ "loss": 0.6194032430648804,
1301
+ "step": 181
1302
+ },
1303
+ {
1304
+ "epoch": 0.5309024612579762,
1305
+ "grad_norm": 1.5553455352783203,
1306
+ "learning_rate": 1.8417536800636138e-05,
1307
+ "loss": 0.5645846724510193,
1308
+ "step": 182
1309
+ },
1310
+ {
1311
+ "epoch": 0.5338195077484047,
1312
+ "grad_norm": 1.2673912048339844,
1313
+ "learning_rate": 1.8389939542028484e-05,
1314
+ "loss": 0.6267315745353699,
1315
+ "step": 183
1316
+ },
1317
+ {
1318
+ "epoch": 0.5367365542388332,
1319
+ "grad_norm": 1.0273847579956055,
1320
+ "learning_rate": 1.8362124769373064e-05,
1321
+ "loss": 0.5256403684616089,
1322
+ "step": 184
1323
+ },
1324
+ {
1325
+ "epoch": 0.5396536007292616,
1326
+ "grad_norm": 1.006093978881836,
1327
+ "learning_rate": 1.8334093203783986e-05,
1328
+ "loss": 0.5916382074356079,
1329
+ "step": 185
1330
+ },
1331
+ {
1332
+ "epoch": 0.5425706472196901,
1333
+ "grad_norm": 1.2740857601165771,
1334
+ "learning_rate": 1.8305845571995843e-05,
1335
+ "loss": 0.581648588180542,
1336
+ "step": 186
1337
+ },
1338
+ {
1339
+ "epoch": 0.5454876937101185,
1340
+ "grad_norm": 1.494248390197754,
1341
+ "learning_rate": 1.8277382606344872e-05,
1342
+ "loss": 0.4824523627758026,
1343
+ "step": 187
1344
+ },
1345
+ {
1346
+ "epoch": 0.548404740200547,
1347
+ "grad_norm": 1.1862496137619019,
1348
+ "learning_rate": 1.824870504474996e-05,
1349
+ "loss": 0.5531858205795288,
1350
+ "step": 188
1351
+ },
1352
+ {
1353
+ "epoch": 0.5513217866909754,
1354
+ "grad_norm": 3.503049373626709,
1355
+ "learning_rate": 1.8219813630693523e-05,
1356
+ "loss": 0.6308296918869019,
1357
+ "step": 189
1358
+ },
1359
+ {
1360
+ "epoch": 0.5542388331814039,
1361
+ "grad_norm": 1.7544710636138916,
1362
+ "learning_rate": 1.819070911320222e-05,
1363
+ "loss": 0.6146273016929626,
1364
+ "step": 190
1365
+ },
1366
+ {
1367
+ "epoch": 0.5571558796718322,
1368
+ "grad_norm": 1.3367774486541748,
1369
+ "learning_rate": 1.8161392246827546e-05,
1370
+ "loss": 0.5848966240882874,
1371
+ "step": 191
1372
+ },
1373
+ {
1374
+ "epoch": 0.5600729261622607,
1375
+ "grad_norm": 1.696418046951294,
1376
+ "learning_rate": 1.8131863791626263e-05,
1377
+ "loss": 0.6621730327606201,
1378
+ "step": 192
1379
+ },
1380
+ {
1381
+ "epoch": 0.5629899726526891,
1382
+ "grad_norm": 1.360052227973938,
1383
+ "learning_rate": 1.8102124513140694e-05,
1384
+ "loss": 0.5972204208374023,
1385
+ "step": 193
1386
+ },
1387
+ {
1388
+ "epoch": 0.5659070191431176,
1389
+ "grad_norm": 1.5376263856887817,
1390
+ "learning_rate": 1.807217518237888e-05,
1391
+ "loss": 0.4938785433769226,
1392
+ "step": 194
1393
+ },
1394
+ {
1395
+ "epoch": 0.568824065633546,
1396
+ "grad_norm": 1.2249681949615479,
1397
+ "learning_rate": 1.8042016575794585e-05,
1398
+ "loss": 0.5366095304489136,
1399
+ "step": 195
1400
+ },
1401
+ {
1402
+ "epoch": 0.5717411121239745,
1403
+ "grad_norm": 1.7868080139160156,
1404
+ "learning_rate": 1.8011649475267178e-05,
1405
+ "loss": 0.5116773843765259,
1406
+ "step": 196
1407
+ },
1408
+ {
1409
+ "epoch": 0.574658158614403,
1410
+ "grad_norm": 2.369993209838867,
1411
+ "learning_rate": 1.7981074668081345e-05,
1412
+ "loss": 0.49072742462158203,
1413
+ "step": 197
1414
+ },
1415
+ {
1416
+ "epoch": 0.5775752051048314,
1417
+ "grad_norm": 1.0168434381484985,
1418
+ "learning_rate": 1.7950292946906695e-05,
1419
+ "loss": 0.5691611170768738,
1420
+ "step": 198
1421
+ },
1422
+ {
1423
+ "epoch": 0.5804922515952597,
1424
+ "grad_norm": 1.2990851402282715,
1425
+ "learning_rate": 1.7919305109777195e-05,
1426
+ "loss": 0.5515039563179016,
1427
+ "step": 199
1428
+ },
1429
+ {
1430
+ "epoch": 0.5834092980856882,
1431
+ "grad_norm": 1.4859853982925415,
1432
+ "learning_rate": 1.7888111960070493e-05,
1433
+ "loss": 0.5017011165618896,
1434
+ "step": 200
1435
+ },
1436
+ {
1437
+ "epoch": 0.5834092980856882,
1438
+ "eval_loss": 0.5414339303970337,
1439
+ "eval_runtime": 1180.7894,
1440
+ "eval_samples_per_second": 0.535,
1441
+ "eval_steps_per_second": 0.535,
1442
+ "step": 200
1443
+ },
1444
+ {
1445
+ "epoch": 0.5863263445761167,
1446
+ "grad_norm": 1.0065829753875732,
1447
+ "learning_rate": 1.7856714306487088e-05,
1448
+ "loss": 0.5677731037139893,
1449
+ "step": 201
1450
+ },
1451
+ {
1452
+ "epoch": 0.5892433910665451,
1453
+ "grad_norm": 1.1727538108825684,
1454
+ "learning_rate": 1.7825112963029352e-05,
1455
+ "loss": 0.4525509476661682,
1456
+ "step": 202
1457
+ },
1458
+ {
1459
+ "epoch": 0.5921604375569736,
1460
+ "grad_norm": 1.3376752138137817,
1461
+ "learning_rate": 1.7793308748980437e-05,
1462
+ "loss": 0.5208959579467773,
1463
+ "step": 203
1464
+ },
1465
+ {
1466
+ "epoch": 0.595077484047402,
1467
+ "grad_norm": 0.9196159839630127,
1468
+ "learning_rate": 1.776130248888304e-05,
1469
+ "loss": 0.6033903360366821,
1470
+ "step": 204
1471
+ },
1472
+ {
1473
+ "epoch": 0.5979945305378305,
1474
+ "grad_norm": 1.0750919580459595,
1475
+ "learning_rate": 1.772909501251801e-05,
1476
+ "loss": 0.5449609160423279,
1477
+ "step": 205
1478
+ },
1479
+ {
1480
+ "epoch": 0.6009115770282589,
1481
+ "grad_norm": 1.2459467649459839,
1482
+ "learning_rate": 1.769668715488285e-05,
1483
+ "loss": 0.5685338377952576,
1484
+ "step": 206
1485
+ },
1486
+ {
1487
+ "epoch": 0.6038286235186874,
1488
+ "grad_norm": 1.1690552234649658,
1489
+ "learning_rate": 1.766407975617006e-05,
1490
+ "loss": 0.5240382552146912,
1491
+ "step": 207
1492
+ },
1493
+ {
1494
+ "epoch": 0.6067456700091157,
1495
+ "grad_norm": 1.0816599130630493,
1496
+ "learning_rate": 1.7631273661745362e-05,
1497
+ "loss": 0.6802893877029419,
1498
+ "step": 208
1499
+ },
1500
+ {
1501
+ "epoch": 0.6096627164995442,
1502
+ "grad_norm": 1.3662947416305542,
1503
+ "learning_rate": 1.7598269722125775e-05,
1504
+ "loss": 0.48193931579589844,
1505
+ "step": 209
1506
+ },
1507
+ {
1508
+ "epoch": 0.6125797629899726,
1509
+ "grad_norm": 0.9364766478538513,
1510
+ "learning_rate": 1.7565068792957576e-05,
1511
+ "loss": 0.5675849914550781,
1512
+ "step": 210
1513
+ },
1514
+ {
1515
+ "epoch": 0.6154968094804011,
1516
+ "grad_norm": 1.123828411102295,
1517
+ "learning_rate": 1.75316717349941e-05,
1518
+ "loss": 0.5474762916564941,
1519
+ "step": 211
1520
+ },
1521
+ {
1522
+ "epoch": 0.6184138559708295,
1523
+ "grad_norm": 1.1924363374710083,
1524
+ "learning_rate": 1.749807941407345e-05,
1525
+ "loss": 0.4918654263019562,
1526
+ "step": 212
1527
+ },
1528
+ {
1529
+ "epoch": 0.621330902461258,
1530
+ "grad_norm": 1.101293921470642,
1531
+ "learning_rate": 1.7464292701096014e-05,
1532
+ "loss": 0.5742691159248352,
1533
+ "step": 213
1534
+ },
1535
+ {
1536
+ "epoch": 0.6242479489516864,
1537
+ "grad_norm": 1.7374963760375977,
1538
+ "learning_rate": 1.7430312472001928e-05,
1539
+ "loss": 0.5828965902328491,
1540
+ "step": 214
1541
+ },
1542
+ {
1543
+ "epoch": 0.6271649954421149,
1544
+ "grad_norm": 1.3195666074752808,
1545
+ "learning_rate": 1.739613960774833e-05,
1546
+ "loss": 0.5265159010887146,
1547
+ "step": 215
1548
+ },
1549
+ {
1550
+ "epoch": 0.6300820419325432,
1551
+ "grad_norm": 1.254686713218689,
1552
+ "learning_rate": 1.7361774994286545e-05,
1553
+ "loss": 0.4929371476173401,
1554
+ "step": 216
1555
+ },
1556
+ {
1557
+ "epoch": 0.6329990884229717,
1558
+ "grad_norm": 1.1476380825042725,
1559
+ "learning_rate": 1.7327219522539102e-05,
1560
+ "loss": 0.5060417652130127,
1561
+ "step": 217
1562
+ },
1563
+ {
1564
+ "epoch": 0.6359161349134002,
1565
+ "grad_norm": 1.0914150476455688,
1566
+ "learning_rate": 1.7292474088376643e-05,
1567
+ "loss": 0.504043698310852,
1568
+ "step": 218
1569
+ },
1570
+ {
1571
+ "epoch": 0.6388331814038286,
1572
+ "grad_norm": 1.1339508295059204,
1573
+ "learning_rate": 1.7257539592594698e-05,
1574
+ "loss": 0.4797310531139374,
1575
+ "step": 219
1576
+ },
1577
+ {
1578
+ "epoch": 0.6417502278942571,
1579
+ "grad_norm": 1.0805399417877197,
1580
+ "learning_rate": 1.722241694089033e-05,
1581
+ "loss": 0.5878555178642273,
1582
+ "step": 220
1583
+ },
1584
+ {
1585
+ "epoch": 0.6446672743846855,
1586
+ "grad_norm": 1.8615056276321411,
1587
+ "learning_rate": 1.718710704383865e-05,
1588
+ "loss": 0.5005823969841003,
1589
+ "step": 221
1590
+ },
1591
+ {
1592
+ "epoch": 0.647584320875114,
1593
+ "grad_norm": 1.1445401906967163,
1594
+ "learning_rate": 1.7151610816869214e-05,
1595
+ "loss": 0.4949319064617157,
1596
+ "step": 222
1597
+ },
1598
+ {
1599
+ "epoch": 0.6505013673655424,
1600
+ "grad_norm": 0.9726515412330627,
1601
+ "learning_rate": 1.711592918024229e-05,
1602
+ "loss": 0.5073204040527344,
1603
+ "step": 223
1604
+ },
1605
+ {
1606
+ "epoch": 0.6534184138559709,
1607
+ "grad_norm": 1.4491140842437744,
1608
+ "learning_rate": 1.7080063059024998e-05,
1609
+ "loss": 0.47885262966156006,
1610
+ "step": 224
1611
+ },
1612
+ {
1613
+ "epoch": 0.6563354603463992,
1614
+ "grad_norm": 1.0070592164993286,
1615
+ "learning_rate": 1.7044013383067327e-05,
1616
+ "loss": 0.5775837898254395,
1617
+ "step": 225
1618
+ },
1619
+ {
1620
+ "epoch": 0.6592525068368277,
1621
+ "grad_norm": 0.966221272945404,
1622
+ "learning_rate": 1.7007781086978037e-05,
1623
+ "loss": 0.5050399899482727,
1624
+ "step": 226
1625
+ },
1626
+ {
1627
+ "epoch": 0.6621695533272561,
1628
+ "grad_norm": 0.9808815121650696,
1629
+ "learning_rate": 1.6971367110100407e-05,
1630
+ "loss": 0.5737045407295227,
1631
+ "step": 227
1632
+ },
1633
+ {
1634
+ "epoch": 0.6650865998176846,
1635
+ "grad_norm": 1.0158127546310425,
1636
+ "learning_rate": 1.6934772396487906e-05,
1637
+ "loss": 0.48077821731567383,
1638
+ "step": 228
1639
+ },
1640
+ {
1641
+ "epoch": 0.668003646308113,
1642
+ "grad_norm": 1.32015860080719,
1643
+ "learning_rate": 1.6897997894879706e-05,
1644
+ "loss": 0.5614925026893616,
1645
+ "step": 229
1646
+ },
1647
+ {
1648
+ "epoch": 0.6709206927985415,
1649
+ "grad_norm": 1.1055903434753418,
1650
+ "learning_rate": 1.686104455867608e-05,
1651
+ "loss": 0.4970760643482208,
1652
+ "step": 230
1653
+ },
1654
+ {
1655
+ "epoch": 0.67383773928897,
1656
+ "grad_norm": 1.0804500579833984,
1657
+ "learning_rate": 1.682391334591371e-05,
1658
+ "loss": 0.5540452003479004,
1659
+ "step": 231
1660
+ },
1661
+ {
1662
+ "epoch": 0.6767547857793984,
1663
+ "grad_norm": 1.1906245946884155,
1664
+ "learning_rate": 1.6786605219240807e-05,
1665
+ "loss": 0.5778501033782959,
1666
+ "step": 232
1667
+ },
1668
+ {
1669
+ "epoch": 0.6796718322698267,
1670
+ "grad_norm": 0.9758645296096802,
1671
+ "learning_rate": 1.6749121145892192e-05,
1672
+ "loss": 0.49073565006256104,
1673
+ "step": 233
1674
+ },
1675
+ {
1676
+ "epoch": 0.6825888787602552,
1677
+ "grad_norm": 1.1678364276885986,
1678
+ "learning_rate": 1.6711462097664207e-05,
1679
+ "loss": 0.4828741252422333,
1680
+ "step": 234
1681
+ },
1682
+ {
1683
+ "epoch": 0.6855059252506837,
1684
+ "grad_norm": 1.148301362991333,
1685
+ "learning_rate": 1.6673629050889507e-05,
1686
+ "loss": 0.5143818855285645,
1687
+ "step": 235
1688
+ },
1689
+ {
1690
+ "epoch": 0.6884229717411121,
1691
+ "grad_norm": 1.005898356437683,
1692
+ "learning_rate": 1.6635622986411776e-05,
1693
+ "loss": 0.5301160216331482,
1694
+ "step": 236
1695
+ },
1696
+ {
1697
+ "epoch": 0.6913400182315406,
1698
+ "grad_norm": 1.2227320671081543,
1699
+ "learning_rate": 1.659744488956027e-05,
1700
+ "loss": 0.4800386130809784,
1701
+ "step": 237
1702
+ },
1703
+ {
1704
+ "epoch": 0.694257064721969,
1705
+ "grad_norm": 0.986456573009491,
1706
+ "learning_rate": 1.6559095750124296e-05,
1707
+ "loss": 0.5098081827163696,
1708
+ "step": 238
1709
+ },
1710
+ {
1711
+ "epoch": 0.6971741112123975,
1712
+ "grad_norm": 1.1474376916885376,
1713
+ "learning_rate": 1.6520576562327518e-05,
1714
+ "loss": 0.5147273540496826,
1715
+ "step": 239
1716
+ },
1717
+ {
1718
+ "epoch": 0.7000911577028259,
1719
+ "grad_norm": 1.10917067527771,
1720
+ "learning_rate": 1.6481888324802223e-05,
1721
+ "loss": 0.5023190379142761,
1722
+ "step": 240
1723
+ },
1724
+ {
1725
+ "epoch": 0.7030082041932544,
1726
+ "grad_norm": 1.2339262962341309,
1727
+ "learning_rate": 1.644303204056341e-05,
1728
+ "loss": 0.5282092690467834,
1729
+ "step": 241
1730
+ },
1731
+ {
1732
+ "epoch": 0.7059252506836827,
1733
+ "grad_norm": 0.997941255569458,
1734
+ "learning_rate": 1.640400871698277e-05,
1735
+ "loss": 0.5635963082313538,
1736
+ "step": 242
1737
+ },
1738
+ {
1739
+ "epoch": 0.7088422971741112,
1740
+ "grad_norm": 1.0345823764801025,
1741
+ "learning_rate": 1.63648193657626e-05,
1742
+ "loss": 0.5577977895736694,
1743
+ "step": 243
1744
+ },
1745
+ {
1746
+ "epoch": 0.7117593436645396,
1747
+ "grad_norm": 1.3468303680419922,
1748
+ "learning_rate": 1.6325465002909554e-05,
1749
+ "loss": 0.4365362524986267,
1750
+ "step": 244
1751
+ },
1752
+ {
1753
+ "epoch": 0.7146763901549681,
1754
+ "grad_norm": 1.2817128896713257,
1755
+ "learning_rate": 1.628594664870831e-05,
1756
+ "loss": 0.46069926023483276,
1757
+ "step": 245
1758
+ },
1759
+ {
1760
+ "epoch": 0.7175934366453965,
1761
+ "grad_norm": 1.043311357498169,
1762
+ "learning_rate": 1.6246265327695117e-05,
1763
+ "loss": 0.5476971864700317,
1764
+ "step": 246
1765
+ },
1766
+ {
1767
+ "epoch": 0.720510483135825,
1768
+ "grad_norm": 1.0297389030456543,
1769
+ "learning_rate": 1.620642206863124e-05,
1770
+ "loss": 0.48051249980926514,
1771
+ "step": 247
1772
+ },
1773
+ {
1774
+ "epoch": 0.7234275296262535,
1775
+ "grad_norm": 1.4869836568832397,
1776
+ "learning_rate": 1.6166417904476257e-05,
1777
+ "loss": 0.5683314800262451,
1778
+ "step": 248
1779
+ },
1780
+ {
1781
+ "epoch": 0.7263445761166819,
1782
+ "grad_norm": 1.0628005266189575,
1783
+ "learning_rate": 1.6126253872361336e-05,
1784
+ "loss": 0.5277887582778931,
1785
+ "step": 249
1786
+ },
1787
+ {
1788
+ "epoch": 0.7292616226071102,
1789
+ "grad_norm": 1.2682170867919922,
1790
+ "learning_rate": 1.608593101356229e-05,
1791
+ "loss": 0.5048879384994507,
1792
+ "step": 250
1793
+ },
1794
+ {
1795
+ "epoch": 0.7292616226071102,
1796
+ "eval_loss": 0.5038471221923828,
1797
+ "eval_runtime": 1175.0375,
1798
+ "eval_samples_per_second": 0.538,
1799
+ "eval_steps_per_second": 0.538,
1800
+ "step": 250
1801
+ },
1802
+ {
1803
+ "epoch": 0.7321786690975387,
1804
+ "grad_norm": 1.7376199960708618,
1805
+ "learning_rate": 1.6045450373472626e-05,
1806
+ "loss": 0.5093721151351929,
1807
+ "step": 251
1808
+ },
1809
+ {
1810
+ "epoch": 0.7350957155879672,
1811
+ "grad_norm": 1.6047718524932861,
1812
+ "learning_rate": 1.6004813001576405e-05,
1813
+ "loss": 0.4796055555343628,
1814
+ "step": 252
1815
+ },
1816
+ {
1817
+ "epoch": 0.7380127620783956,
1818
+ "grad_norm": 1.3582886457443237,
1819
+ "learning_rate": 1.5964019951421058e-05,
1820
+ "loss": 0.4733014702796936,
1821
+ "step": 253
1822
+ },
1823
+ {
1824
+ "epoch": 0.7409298085688241,
1825
+ "grad_norm": 0.9468897581100464,
1826
+ "learning_rate": 1.5923072280590072e-05,
1827
+ "loss": 0.5312032103538513,
1828
+ "step": 254
1829
+ },
1830
+ {
1831
+ "epoch": 0.7438468550592525,
1832
+ "grad_norm": 1.3890198469161987,
1833
+ "learning_rate": 1.5881971050675547e-05,
1834
+ "loss": 0.47576645016670227,
1835
+ "step": 255
1836
+ },
1837
+ {
1838
+ "epoch": 0.746763901549681,
1839
+ "grad_norm": 1.782992959022522,
1840
+ "learning_rate": 1.584071732725071e-05,
1841
+ "loss": 0.5555092096328735,
1842
+ "step": 256
1843
+ },
1844
+ {
1845
+ "epoch": 0.7496809480401094,
1846
+ "grad_norm": 1.1790621280670166,
1847
+ "learning_rate": 1.5799312179842265e-05,
1848
+ "loss": 0.5148727893829346,
1849
+ "step": 257
1850
+ },
1851
+ {
1852
+ "epoch": 0.7525979945305379,
1853
+ "grad_norm": 1.446694254875183,
1854
+ "learning_rate": 1.5757756681902664e-05,
1855
+ "loss": 0.49939870834350586,
1856
+ "step": 258
1857
+ },
1858
+ {
1859
+ "epoch": 0.7555150410209662,
1860
+ "grad_norm": 1.1786166429519653,
1861
+ "learning_rate": 1.571605191078229e-05,
1862
+ "loss": 0.562156081199646,
1863
+ "step": 259
1864
+ },
1865
+ {
1866
+ "epoch": 0.7584320875113947,
1867
+ "grad_norm": 1.16925847530365,
1868
+ "learning_rate": 1.567419894770151e-05,
1869
+ "loss": 0.49580734968185425,
1870
+ "step": 260
1871
+ },
1872
+ {
1873
+ "epoch": 0.7613491340018231,
1874
+ "grad_norm": 1.60944664478302,
1875
+ "learning_rate": 1.5632198877722676e-05,
1876
+ "loss": 0.4821680784225464,
1877
+ "step": 261
1878
+ },
1879
+ {
1880
+ "epoch": 0.7642661804922516,
1881
+ "grad_norm": 1.3957884311676025,
1882
+ "learning_rate": 1.5590052789721946e-05,
1883
+ "loss": 0.4392276406288147,
1884
+ "step": 262
1885
+ },
1886
+ {
1887
+ "epoch": 0.76718322698268,
1888
+ "grad_norm": 1.636195421218872,
1889
+ "learning_rate": 1.5547761776361096e-05,
1890
+ "loss": 0.39603114128112793,
1891
+ "step": 263
1892
+ },
1893
+ {
1894
+ "epoch": 0.7701002734731085,
1895
+ "grad_norm": 1.496766448020935,
1896
+ "learning_rate": 1.550532693405917e-05,
1897
+ "loss": 0.4833749234676361,
1898
+ "step": 264
1899
+ },
1900
+ {
1901
+ "epoch": 0.773017319963537,
1902
+ "grad_norm": 1.3587844371795654,
1903
+ "learning_rate": 1.5462749362964058e-05,
1904
+ "loss": 0.43738317489624023,
1905
+ "step": 265
1906
+ },
1907
+ {
1908
+ "epoch": 0.7759343664539654,
1909
+ "grad_norm": 1.670704960823059,
1910
+ "learning_rate": 1.5420030166923983e-05,
1911
+ "loss": 0.4476737380027771,
1912
+ "step": 266
1913
+ },
1914
+ {
1915
+ "epoch": 0.7788514129443938,
1916
+ "grad_norm": 1.2674932479858398,
1917
+ "learning_rate": 1.537717045345888e-05,
1918
+ "loss": 0.42266708612442017,
1919
+ "step": 267
1920
+ },
1921
+ {
1922
+ "epoch": 0.7817684594348222,
1923
+ "grad_norm": 2.0639536380767822,
1924
+ "learning_rate": 1.5334171333731666e-05,
1925
+ "loss": 0.5245381593704224,
1926
+ "step": 268
1927
+ },
1928
+ {
1929
+ "epoch": 0.7846855059252507,
1930
+ "grad_norm": 1.2091766595840454,
1931
+ "learning_rate": 1.529103392251946e-05,
1932
+ "loss": 0.5166443586349487,
1933
+ "step": 269
1934
+ },
1935
+ {
1936
+ "epoch": 0.7876025524156791,
1937
+ "grad_norm": 1.1021631956100464,
1938
+ "learning_rate": 1.5247759338184653e-05,
1939
+ "loss": 0.5674265027046204,
1940
+ "step": 270
1941
+ },
1942
+ {
1943
+ "epoch": 0.7905195989061076,
1944
+ "grad_norm": 1.3143829107284546,
1945
+ "learning_rate": 1.520434870264595e-05,
1946
+ "loss": 0.40855613350868225,
1947
+ "step": 271
1948
+ },
1949
+ {
1950
+ "epoch": 0.793436645396536,
1951
+ "grad_norm": 1.1784812211990356,
1952
+ "learning_rate": 1.5160803141349244e-05,
1953
+ "loss": 0.4308925271034241,
1954
+ "step": 272
1955
+ },
1956
+ {
1957
+ "epoch": 0.7963536918869645,
1958
+ "grad_norm": 2.1635706424713135,
1959
+ "learning_rate": 1.5117123783238458e-05,
1960
+ "loss": 0.45035502314567566,
1961
+ "step": 273
1962
+ },
1963
+ {
1964
+ "epoch": 0.7992707383773929,
1965
+ "grad_norm": 1.569203495979309,
1966
+ "learning_rate": 1.5073311760726287e-05,
1967
+ "loss": 0.5095728635787964,
1968
+ "step": 274
1969
+ },
1970
+ {
1971
+ "epoch": 0.8021877848678214,
1972
+ "grad_norm": 2.532621383666992,
1973
+ "learning_rate": 1.5029368209664822e-05,
1974
+ "loss": 0.496748685836792,
1975
+ "step": 275
1976
+ },
1977
+ {
1978
+ "epoch": 0.8051048313582497,
1979
+ "grad_norm": 1.6312552690505981,
1980
+ "learning_rate": 1.4985294269316098e-05,
1981
+ "loss": 0.4972914159297943,
1982
+ "step": 276
1983
+ },
1984
+ {
1985
+ "epoch": 0.8080218778486782,
1986
+ "grad_norm": 1.3996756076812744,
1987
+ "learning_rate": 1.4941091082322579e-05,
1988
+ "loss": 0.5589750409126282,
1989
+ "step": 277
1990
+ },
1991
+ {
1992
+ "epoch": 0.8109389243391066,
1993
+ "grad_norm": 1.1288363933563232,
1994
+ "learning_rate": 1.4896759794677526e-05,
1995
+ "loss": 0.5349453687667847,
1996
+ "step": 278
1997
+ },
1998
+ {
1999
+ "epoch": 0.8138559708295351,
2000
+ "grad_norm": 1.6913920640945435,
2001
+ "learning_rate": 1.4852301555695268e-05,
2002
+ "loss": 0.46511000394821167,
2003
+ "step": 279
2004
+ },
2005
+ {
2006
+ "epoch": 0.8167730173199635,
2007
+ "grad_norm": 1.1913212537765503,
2008
+ "learning_rate": 1.4807717517981439e-05,
2009
+ "loss": 0.4715422987937927,
2010
+ "step": 280
2011
+ },
2012
+ {
2013
+ "epoch": 0.819690063810392,
2014
+ "grad_norm": 1.1179691553115845,
2015
+ "learning_rate": 1.476300883740307e-05,
2016
+ "loss": 0.53330397605896,
2017
+ "step": 281
2018
+ },
2019
+ {
2020
+ "epoch": 0.8226071103008205,
2021
+ "grad_norm": 1.7473797798156738,
2022
+ "learning_rate": 1.4718176673058624e-05,
2023
+ "loss": 0.47564437985420227,
2024
+ "step": 282
2025
+ },
2026
+ {
2027
+ "epoch": 0.8255241567912489,
2028
+ "grad_norm": 1.2653177976608276,
2029
+ "learning_rate": 1.4673222187247963e-05,
2030
+ "loss": 0.46364277601242065,
2031
+ "step": 283
2032
+ },
2033
+ {
2034
+ "epoch": 0.8284412032816773,
2035
+ "grad_norm": 1.2567330598831177,
2036
+ "learning_rate": 1.4628146545442202e-05,
2037
+ "loss": 0.4778091013431549,
2038
+ "step": 284
2039
+ },
2040
+ {
2041
+ "epoch": 0.8313582497721057,
2042
+ "grad_norm": 1.5848406553268433,
2043
+ "learning_rate": 1.4582950916253488e-05,
2044
+ "loss": 0.4480203688144684,
2045
+ "step": 285
2046
+ },
2047
+ {
2048
+ "epoch": 0.8342752962625342,
2049
+ "grad_norm": 1.3278183937072754,
2050
+ "learning_rate": 1.453763647140472e-05,
2051
+ "loss": 0.37945032119750977,
2052
+ "step": 286
2053
+ },
2054
+ {
2055
+ "epoch": 0.8371923427529626,
2056
+ "grad_norm": 1.0961651802062988,
2057
+ "learning_rate": 1.4492204385699155e-05,
2058
+ "loss": 0.5306747555732727,
2059
+ "step": 287
2060
+ },
2061
+ {
2062
+ "epoch": 0.8401093892433911,
2063
+ "grad_norm": 1.176276683807373,
2064
+ "learning_rate": 1.4446655836989961e-05,
2065
+ "loss": 0.49950045347213745,
2066
+ "step": 288
2067
+ },
2068
+ {
2069
+ "epoch": 0.8430264357338195,
2070
+ "grad_norm": 1.2228269577026367,
2071
+ "learning_rate": 1.4400992006149674e-05,
2072
+ "loss": 0.494475394487381,
2073
+ "step": 289
2074
+ },
2075
+ {
2076
+ "epoch": 0.845943482224248,
2077
+ "grad_norm": 1.1584209203720093,
2078
+ "learning_rate": 1.4355214077039592e-05,
2079
+ "loss": 0.44170859456062317,
2080
+ "step": 290
2081
+ },
2082
+ {
2083
+ "epoch": 0.8488605287146764,
2084
+ "grad_norm": 1.2041938304901123,
2085
+ "learning_rate": 1.4309323236479071e-05,
2086
+ "loss": 0.4359871745109558,
2087
+ "step": 291
2088
+ },
2089
+ {
2090
+ "epoch": 0.8517775752051049,
2091
+ "grad_norm": 1.279645562171936,
2092
+ "learning_rate": 1.4263320674214762e-05,
2093
+ "loss": 0.45031386613845825,
2094
+ "step": 292
2095
+ },
2096
+ {
2097
+ "epoch": 0.8546946216955332,
2098
+ "grad_norm": 1.3958357572555542,
2099
+ "learning_rate": 1.4217207582889769e-05,
2100
+ "loss": 0.4832204580307007,
2101
+ "step": 293
2102
+ },
2103
+ {
2104
+ "epoch": 0.8576116681859617,
2105
+ "grad_norm": 1.2788586616516113,
2106
+ "learning_rate": 1.4170985158012725e-05,
2107
+ "loss": 0.5154346227645874,
2108
+ "step": 294
2109
+ },
2110
+ {
2111
+ "epoch": 0.8605287146763901,
2112
+ "grad_norm": 1.3634892702102661,
2113
+ "learning_rate": 1.4124654597926795e-05,
2114
+ "loss": 0.46777206659317017,
2115
+ "step": 295
2116
+ },
2117
+ {
2118
+ "epoch": 0.8634457611668186,
2119
+ "grad_norm": 1.2719579935073853,
2120
+ "learning_rate": 1.4078217103778619e-05,
2121
+ "loss": 0.4247053265571594,
2122
+ "step": 296
2123
+ },
2124
+ {
2125
+ "epoch": 0.866362807657247,
2126
+ "grad_norm": 2.890467643737793,
2127
+ "learning_rate": 1.4031673879487161e-05,
2128
+ "loss": 0.38349640369415283,
2129
+ "step": 297
2130
+ },
2131
+ {
2132
+ "epoch": 0.8692798541476755,
2133
+ "grad_norm": 2.4354801177978516,
2134
+ "learning_rate": 1.3985026131712499e-05,
2135
+ "loss": 0.4134889543056488,
2136
+ "step": 298
2137
+ },
2138
+ {
2139
+ "epoch": 0.872196900638104,
2140
+ "grad_norm": 1.0138323307037354,
2141
+ "learning_rate": 1.3938275069824541e-05,
2142
+ "loss": 0.5176680684089661,
2143
+ "step": 299
2144
+ },
2145
+ {
2146
+ "epoch": 0.8751139471285324,
2147
+ "grad_norm": 1.2316186428070068,
2148
+ "learning_rate": 1.389142190587168e-05,
2149
+ "loss": 0.4818477928638458,
2150
+ "step": 300
2151
+ },
2152
+ {
2153
+ "epoch": 0.8751139471285324,
2154
+ "eval_loss": 0.4752846360206604,
2155
+ "eval_runtime": 1189.1666,
2156
+ "eval_samples_per_second": 0.531,
2157
+ "eval_steps_per_second": 0.531,
2158
+ "step": 300
2159
+ },
2160
+ {
2161
+ "epoch": 0.8780309936189608,
2162
+ "grad_norm": 1.515487551689148,
2163
+ "learning_rate": 1.384446785454936e-05,
2164
+ "loss": 0.47766175866127014,
2165
+ "step": 301
2166
+ },
2167
+ {
2168
+ "epoch": 0.8809480401093892,
2169
+ "grad_norm": 1.4357497692108154,
2170
+ "learning_rate": 1.3797414133168591e-05,
2171
+ "loss": 0.49297061562538147,
2172
+ "step": 302
2173
+ },
2174
+ {
2175
+ "epoch": 0.8838650865998177,
2176
+ "grad_norm": 1.2523037195205688,
2177
+ "learning_rate": 1.3750261961624383e-05,
2178
+ "loss": 0.4629015326499939,
2179
+ "step": 303
2180
+ },
2181
+ {
2182
+ "epoch": 0.8867821330902461,
2183
+ "grad_norm": 3.5790023803710938,
2184
+ "learning_rate": 1.3703012562364124e-05,
2185
+ "loss": 0.3773120045661926,
2186
+ "step": 304
2187
+ },
2188
+ {
2189
+ "epoch": 0.8896991795806746,
2190
+ "grad_norm": 1.9305704832077026,
2191
+ "learning_rate": 1.3655667160355892e-05,
2192
+ "loss": 0.496719628572464,
2193
+ "step": 305
2194
+ },
2195
+ {
2196
+ "epoch": 0.892616226071103,
2197
+ "grad_norm": 1.1506154537200928,
2198
+ "learning_rate": 1.3608226983056687e-05,
2199
+ "loss": 0.49487072229385376,
2200
+ "step": 306
2201
+ },
2202
+ {
2203
+ "epoch": 0.8955332725615315,
2204
+ "grad_norm": 1.8046090602874756,
2205
+ "learning_rate": 1.3560693260380614e-05,
2206
+ "loss": 0.4910697937011719,
2207
+ "step": 307
2208
+ },
2209
+ {
2210
+ "epoch": 0.8984503190519599,
2211
+ "grad_norm": 2.0088653564453125,
2212
+ "learning_rate": 1.3513067224667e-05,
2213
+ "loss": 0.508246660232544,
2214
+ "step": 308
2215
+ },
2216
+ {
2217
+ "epoch": 0.9013673655423883,
2218
+ "grad_norm": 1.2966033220291138,
2219
+ "learning_rate": 1.3465350110648437e-05,
2220
+ "loss": 0.5125166177749634,
2221
+ "step": 309
2222
+ },
2223
+ {
2224
+ "epoch": 0.9042844120328167,
2225
+ "grad_norm": 1.9976309537887573,
2226
+ "learning_rate": 1.3417543155418775e-05,
2227
+ "loss": 0.43942537903785706,
2228
+ "step": 310
2229
+ },
2230
+ {
2231
+ "epoch": 0.9072014585232452,
2232
+ "grad_norm": 1.2663682699203491,
2233
+ "learning_rate": 1.336964759840105e-05,
2234
+ "loss": 0.4839101731777191,
2235
+ "step": 311
2236
+ },
2237
+ {
2238
+ "epoch": 0.9101185050136736,
2239
+ "grad_norm": 1.1223328113555908,
2240
+ "learning_rate": 1.3321664681315354e-05,
2241
+ "loss": 0.48008066415786743,
2242
+ "step": 312
2243
+ },
2244
+ {
2245
+ "epoch": 0.9130355515041021,
2246
+ "grad_norm": 1.5786972045898438,
2247
+ "learning_rate": 1.3273595648146634e-05,
2248
+ "loss": 0.47250309586524963,
2249
+ "step": 313
2250
+ },
2251
+ {
2252
+ "epoch": 0.9159525979945305,
2253
+ "grad_norm": 1.2150241136550903,
2254
+ "learning_rate": 1.322544174511245e-05,
2255
+ "loss": 0.5149738788604736,
2256
+ "step": 314
2257
+ },
2258
+ {
2259
+ "epoch": 0.918869644484959,
2260
+ "grad_norm": 1.3676542043685913,
2261
+ "learning_rate": 1.3177204220630662e-05,
2262
+ "loss": 0.4430195093154907,
2263
+ "step": 315
2264
+ },
2265
+ {
2266
+ "epoch": 0.9217866909753875,
2267
+ "grad_norm": 1.0703285932540894,
2268
+ "learning_rate": 1.3128884325287064e-05,
2269
+ "loss": 0.4798983037471771,
2270
+ "step": 316
2271
+ },
2272
+ {
2273
+ "epoch": 0.9247037374658159,
2274
+ "grad_norm": 1.3131535053253174,
2275
+ "learning_rate": 1.308048331180296e-05,
2276
+ "loss": 0.4241073727607727,
2277
+ "step": 317
2278
+ },
2279
+ {
2280
+ "epoch": 0.9276207839562443,
2281
+ "grad_norm": 1.4485348463058472,
2282
+ "learning_rate": 1.3032002435002698e-05,
2283
+ "loss": 0.527199923992157,
2284
+ "step": 318
2285
+ },
2286
+ {
2287
+ "epoch": 0.9305378304466727,
2288
+ "grad_norm": 1.370936393737793,
2289
+ "learning_rate": 1.2983442951781114e-05,
2290
+ "loss": 0.47125962376594543,
2291
+ "step": 319
2292
+ },
2293
+ {
2294
+ "epoch": 0.9334548769371012,
2295
+ "grad_norm": 1.2369643449783325,
2296
+ "learning_rate": 1.2934806121070973e-05,
2297
+ "loss": 0.4814244210720062,
2298
+ "step": 320
2299
+ },
2300
+ {
2301
+ "epoch": 0.9363719234275296,
2302
+ "grad_norm": 1.2632933855056763,
2303
+ "learning_rate": 1.2886093203810314e-05,
2304
+ "loss": 0.4915548264980316,
2305
+ "step": 321
2306
+ },
2307
+ {
2308
+ "epoch": 0.9392889699179581,
2309
+ "grad_norm": 1.054569959640503,
2310
+ "learning_rate": 1.2837305462909764e-05,
2311
+ "loss": 0.5325602293014526,
2312
+ "step": 322
2313
+ },
2314
+ {
2315
+ "epoch": 0.9422060164083865,
2316
+ "grad_norm": 1.15959632396698,
2317
+ "learning_rate": 1.27884441632198e-05,
2318
+ "loss": 0.43607404828071594,
2319
+ "step": 323
2320
+ },
2321
+ {
2322
+ "epoch": 0.945123062898815,
2323
+ "grad_norm": 1.1667979955673218,
2324
+ "learning_rate": 1.2739510571497945e-05,
2325
+ "loss": 0.4631507992744446,
2326
+ "step": 324
2327
+ },
2328
+ {
2329
+ "epoch": 0.9480401093892434,
2330
+ "grad_norm": 1.6009081602096558,
2331
+ "learning_rate": 1.2690505956375944e-05,
2332
+ "loss": 0.4935731887817383,
2333
+ "step": 325
2334
+ },
2335
+ {
2336
+ "epoch": 0.9509571558796718,
2337
+ "grad_norm": 1.1193996667861938,
2338
+ "learning_rate": 1.2641431588326858e-05,
2339
+ "loss": 0.45883435010910034,
2340
+ "step": 326
2341
+ },
2342
+ {
2343
+ "epoch": 0.9538742023701002,
2344
+ "grad_norm": 1.5365067720413208,
2345
+ "learning_rate": 1.2592288739632138e-05,
2346
+ "loss": 0.5206276178359985,
2347
+ "step": 327
2348
+ },
2349
+ {
2350
+ "epoch": 0.9567912488605287,
2351
+ "grad_norm": 1.0714622735977173,
2352
+ "learning_rate": 1.2543078684348632e-05,
2353
+ "loss": 0.5242853760719299,
2354
+ "step": 328
2355
+ },
2356
+ {
2357
+ "epoch": 0.9597082953509571,
2358
+ "grad_norm": 1.3009248971939087,
2359
+ "learning_rate": 1.2493802698275557e-05,
2360
+ "loss": 0.4794357717037201,
2361
+ "step": 329
2362
+ },
2363
+ {
2364
+ "epoch": 0.9626253418413856,
2365
+ "grad_norm": 1.495771050453186,
2366
+ "learning_rate": 1.244446205892143e-05,
2367
+ "loss": 0.5849282145500183,
2368
+ "step": 330
2369
+ },
2370
+ {
2371
+ "epoch": 0.965542388331814,
2372
+ "grad_norm": 1.2046003341674805,
2373
+ "learning_rate": 1.2395058045470935e-05,
2374
+ "loss": 0.47758305072784424,
2375
+ "step": 331
2376
+ },
2377
+ {
2378
+ "epoch": 0.9684594348222425,
2379
+ "grad_norm": 1.1362569332122803,
2380
+ "learning_rate": 1.2345591938751772e-05,
2381
+ "loss": 0.4490663409233093,
2382
+ "step": 332
2383
+ },
2384
+ {
2385
+ "epoch": 0.971376481312671,
2386
+ "grad_norm": 1.2658129930496216,
2387
+ "learning_rate": 1.2296065021201438e-05,
2388
+ "loss": 0.4035309851169586,
2389
+ "step": 333
2390
+ },
2391
+ {
2392
+ "epoch": 0.9742935278030994,
2393
+ "grad_norm": 4.370306015014648,
2394
+ "learning_rate": 1.2246478576833993e-05,
2395
+ "loss": 0.495273619890213,
2396
+ "step": 334
2397
+ },
2398
+ {
2399
+ "epoch": 0.9772105742935278,
2400
+ "grad_norm": 1.3863654136657715,
2401
+ "learning_rate": 1.219683389120676e-05,
2402
+ "loss": 0.46410733461380005,
2403
+ "step": 335
2404
+ },
2405
+ {
2406
+ "epoch": 0.9801276207839562,
2407
+ "grad_norm": 1.4544321298599243,
2408
+ "learning_rate": 1.2147132251387004e-05,
2409
+ "loss": 0.4301709830760956,
2410
+ "step": 336
2411
+ },
2412
+ {
2413
+ "epoch": 0.9830446672743847,
2414
+ "grad_norm": 1.0852457284927368,
2415
+ "learning_rate": 1.2097374945918554e-05,
2416
+ "loss": 0.48892468214035034,
2417
+ "step": 337
2418
+ },
2419
+ {
2420
+ "epoch": 0.9859617137648131,
2421
+ "grad_norm": 1.5062257051467896,
2422
+ "learning_rate": 1.2047563264788412e-05,
2423
+ "loss": 0.4667983055114746,
2424
+ "step": 338
2425
+ },
2426
+ {
2427
+ "epoch": 0.9888787602552416,
2428
+ "grad_norm": 1.2472951412200928,
2429
+ "learning_rate": 1.199769849939329e-05,
2430
+ "loss": 0.4827345013618469,
2431
+ "step": 339
2432
+ },
2433
+ {
2434
+ "epoch": 0.99179580674567,
2435
+ "grad_norm": 1.2589871883392334,
2436
+ "learning_rate": 1.1947781942506151e-05,
2437
+ "loss": 0.405245304107666,
2438
+ "step": 340
2439
+ },
2440
+ {
2441
+ "epoch": 0.9947128532360985,
2442
+ "grad_norm": 1.25636625289917,
2443
+ "learning_rate": 1.1897814888242679e-05,
2444
+ "loss": 0.37956133484840393,
2445
+ "step": 341
2446
+ },
2447
+ {
2448
+ "epoch": 0.9976298997265269,
2449
+ "grad_norm": 2.7064895629882812,
2450
+ "learning_rate": 1.1847798632027726e-05,
2451
+ "loss": 0.489456444978714,
2452
+ "step": 342
2453
+ },
2454
+ {
2455
+ "epoch": 1.0,
2456
+ "grad_norm": 1.6156240701675415,
2457
+ "learning_rate": 1.1797734470561744e-05,
2458
+ "loss": 0.46473199129104614,
2459
+ "step": 343
2460
+ },
2461
+ {
2462
+ "epoch": 1.0029170464904285,
2463
+ "grad_norm": 1.3046343326568604,
2464
+ "learning_rate": 1.1747623701787143e-05,
2465
+ "loss": 0.3504878282546997,
2466
+ "step": 344
2467
+ },
2468
+ {
2469
+ "epoch": 1.005834092980857,
2470
+ "grad_norm": 1.414828896522522,
2471
+ "learning_rate": 1.1697467624854666e-05,
2472
+ "loss": 0.4719260334968567,
2473
+ "step": 345
2474
+ },
2475
+ {
2476
+ "epoch": 1.0087511394712854,
2477
+ "grad_norm": 1.1873356103897095,
2478
+ "learning_rate": 1.164726754008969e-05,
2479
+ "loss": 0.45313555002212524,
2480
+ "step": 346
2481
+ },
2482
+ {
2483
+ "epoch": 1.0116681859617138,
2484
+ "grad_norm": 1.1382380723953247,
2485
+ "learning_rate": 1.1597024748958526e-05,
2486
+ "loss": 0.4365478456020355,
2487
+ "step": 347
2488
+ },
2489
+ {
2490
+ "epoch": 1.0145852324521423,
2491
+ "grad_norm": 1.8141961097717285,
2492
+ "learning_rate": 1.1546740554034661e-05,
2493
+ "loss": 0.3694503605365753,
2494
+ "step": 348
2495
+ },
2496
+ {
2497
+ "epoch": 1.0175022789425707,
2498
+ "grad_norm": 1.333388328552246,
2499
+ "learning_rate": 1.1496416258965015e-05,
2500
+ "loss": 0.4755721688270569,
2501
+ "step": 349
2502
+ },
2503
+ {
2504
+ "epoch": 1.0204193254329992,
2505
+ "grad_norm": 1.3464443683624268,
2506
+ "learning_rate": 1.1446053168436117e-05,
2507
+ "loss": 0.4227846562862396,
2508
+ "step": 350
2509
+ },
2510
+ {
2511
+ "epoch": 1.0204193254329992,
2512
+ "eval_loss": 0.44924086332321167,
2513
+ "eval_runtime": 1214.6648,
2514
+ "eval_samples_per_second": 0.52,
2515
+ "eval_steps_per_second": 0.52,
2516
+ "step": 350
2517
+ },
2518
+ {
2519
+ "epoch": 1.0233363719234276,
2520
+ "grad_norm": 1.2682689428329468,
2521
+ "learning_rate": 1.1395652588140292e-05,
2522
+ "loss": 0.44300130009651184,
2523
+ "step": 351
2524
+ },
2525
+ {
2526
+ "epoch": 1.0262534184138559,
2527
+ "grad_norm": 1.7737696170806885,
2528
+ "learning_rate": 1.1345215824741814e-05,
2529
+ "loss": 0.5106258988380432,
2530
+ "step": 352
2531
+ },
2532
+ {
2533
+ "epoch": 1.0291704649042843,
2534
+ "grad_norm": 1.2601238489151,
2535
+ "learning_rate": 1.1294744185843014e-05,
2536
+ "loss": 0.45930635929107666,
2537
+ "step": 353
2538
+ },
2539
+ {
2540
+ "epoch": 1.0320875113947128,
2541
+ "grad_norm": 1.2162678241729736,
2542
+ "learning_rate": 1.1244238979950406e-05,
2543
+ "loss": 0.44163084030151367,
2544
+ "step": 354
2545
+ },
2546
+ {
2547
+ "epoch": 1.0350045578851412,
2548
+ "grad_norm": 1.0905817747116089,
2549
+ "learning_rate": 1.1193701516440733e-05,
2550
+ "loss": 0.510662317276001,
2551
+ "step": 355
2552
+ },
2553
+ {
2554
+ "epoch": 1.0379216043755697,
2555
+ "grad_norm": 0.9624952673912048,
2556
+ "learning_rate": 1.1143133105527048e-05,
2557
+ "loss": 0.5297917127609253,
2558
+ "step": 356
2559
+ },
2560
+ {
2561
+ "epoch": 1.0408386508659981,
2562
+ "grad_norm": 1.2757681608200073,
2563
+ "learning_rate": 1.1092535058224725e-05,
2564
+ "loss": 0.4332093596458435,
2565
+ "step": 357
2566
+ },
2567
+ {
2568
+ "epoch": 1.0437556973564266,
2569
+ "grad_norm": 1.6885719299316406,
2570
+ "learning_rate": 1.104190868631748e-05,
2571
+ "loss": 0.4337635040283203,
2572
+ "step": 358
2573
+ },
2574
+ {
2575
+ "epoch": 1.046672743846855,
2576
+ "grad_norm": 1.175484538078308,
2577
+ "learning_rate": 1.099125530232336e-05,
2578
+ "loss": 0.45411020517349243,
2579
+ "step": 359
2580
+ },
2581
+ {
2582
+ "epoch": 1.0495897903372835,
2583
+ "grad_norm": 1.0964939594268799,
2584
+ "learning_rate": 1.0940576219460723e-05,
2585
+ "loss": 0.5333439707756042,
2586
+ "step": 360
2587
+ },
2588
+ {
2589
+ "epoch": 1.052506836827712,
2590
+ "grad_norm": 1.5493136644363403,
2591
+ "learning_rate": 1.0889872751614176e-05,
2592
+ "loss": 0.4400906264781952,
2593
+ "step": 361
2594
+ },
2595
+ {
2596
+ "epoch": 1.0554238833181404,
2597
+ "grad_norm": 1.2491416931152344,
2598
+ "learning_rate": 1.0839146213300526e-05,
2599
+ "loss": 0.31049978733062744,
2600
+ "step": 362
2601
+ },
2602
+ {
2603
+ "epoch": 1.0583409298085689,
2604
+ "grad_norm": 1.7213693857192993,
2605
+ "learning_rate": 1.0788397919634694e-05,
2606
+ "loss": 0.389009028673172,
2607
+ "step": 363
2608
+ },
2609
+ {
2610
+ "epoch": 1.0612579762989973,
2611
+ "grad_norm": 1.5405336618423462,
2612
+ "learning_rate": 1.0737629186295621e-05,
2613
+ "loss": 0.4068562984466553,
2614
+ "step": 364
2615
+ },
2616
+ {
2617
+ "epoch": 1.0641750227894258,
2618
+ "grad_norm": 1.225455641746521,
2619
+ "learning_rate": 1.0686841329492159e-05,
2620
+ "loss": 0.47358617186546326,
2621
+ "step": 365
2622
+ },
2623
+ {
2624
+ "epoch": 1.0670920692798542,
2625
+ "grad_norm": 1.3436250686645508,
2626
+ "learning_rate": 1.0636035665928945e-05,
2627
+ "loss": 0.47050854563713074,
2628
+ "step": 366
2629
+ },
2630
+ {
2631
+ "epoch": 1.0700091157702827,
2632
+ "grad_norm": 1.4952112436294556,
2633
+ "learning_rate": 1.058521351277227e-05,
2634
+ "loss": 0.43496906757354736,
2635
+ "step": 367
2636
+ },
2637
+ {
2638
+ "epoch": 1.072926162260711,
2639
+ "grad_norm": 1.549112319946289,
2640
+ "learning_rate": 1.0534376187615924e-05,
2641
+ "loss": 0.45711052417755127,
2642
+ "step": 368
2643
+ },
2644
+ {
2645
+ "epoch": 1.0758432087511394,
2646
+ "grad_norm": 1.3851526975631714,
2647
+ "learning_rate": 1.048352500844704e-05,
2648
+ "loss": 0.45045915246009827,
2649
+ "step": 369
2650
+ },
2651
+ {
2652
+ "epoch": 1.0787602552415678,
2653
+ "grad_norm": 1.6302049160003662,
2654
+ "learning_rate": 1.0432661293611927e-05,
2655
+ "loss": 0.3736046254634857,
2656
+ "step": 370
2657
+ },
2658
+ {
2659
+ "epoch": 1.0816773017319963,
2660
+ "grad_norm": 1.3365869522094727,
2661
+ "learning_rate": 1.0381786361781885e-05,
2662
+ "loss": 0.42242100834846497,
2663
+ "step": 371
2664
+ },
2665
+ {
2666
+ "epoch": 1.0845943482224247,
2667
+ "grad_norm": 1.4369138479232788,
2668
+ "learning_rate": 1.0330901531919026e-05,
2669
+ "loss": 0.44570961594581604,
2670
+ "step": 372
2671
+ },
2672
+ {
2673
+ "epoch": 1.0875113947128532,
2674
+ "grad_norm": 1.3528283834457397,
2675
+ "learning_rate": 1.0280008123242069e-05,
2676
+ "loss": 0.43440738320350647,
2677
+ "step": 373
2678
+ },
2679
+ {
2680
+ "epoch": 1.0904284412032816,
2681
+ "grad_norm": 1.469660997390747,
2682
+ "learning_rate": 1.0229107455192147e-05,
2683
+ "loss": 0.3960394263267517,
2684
+ "step": 374
2685
+ },
2686
+ {
2687
+ "epoch": 1.09334548769371,
2688
+ "grad_norm": 1.4542185068130493,
2689
+ "learning_rate": 1.0178200847398595e-05,
2690
+ "loss": 0.47834208607673645,
2691
+ "step": 375
2692
+ },
2693
+ {
2694
+ "epoch": 1.0962625341841385,
2695
+ "grad_norm": 1.6470292806625366,
2696
+ "learning_rate": 1.0127289619644737e-05,
2697
+ "loss": 0.42791086435317993,
2698
+ "step": 376
2699
+ },
2700
+ {
2701
+ "epoch": 1.099179580674567,
2702
+ "grad_norm": 1.1934021711349487,
2703
+ "learning_rate": 1.0076375091833681e-05,
2704
+ "loss": 0.4401305019855499,
2705
+ "step": 377
2706
+ },
2707
+ {
2708
+ "epoch": 1.1020966271649955,
2709
+ "grad_norm": 0.9786668419837952,
2710
+ "learning_rate": 1.0025458583954078e-05,
2711
+ "loss": 0.4816555678844452,
2712
+ "step": 378
2713
+ },
2714
+ {
2715
+ "epoch": 1.105013673655424,
2716
+ "grad_norm": 1.1348779201507568,
2717
+ "learning_rate": 9.974541416045924e-06,
2718
+ "loss": 0.41516968607902527,
2719
+ "step": 379
2720
+ },
2721
+ {
2722
+ "epoch": 1.1079307201458524,
2723
+ "grad_norm": 1.0188615322113037,
2724
+ "learning_rate": 9.923624908166322e-06,
2725
+ "loss": 0.48087278008461,
2726
+ "step": 380
2727
+ },
2728
+ {
2729
+ "epoch": 1.1108477666362808,
2730
+ "grad_norm": 1.0821740627288818,
2731
+ "learning_rate": 9.872710380355263e-06,
2732
+ "loss": 0.41974008083343506,
2733
+ "step": 381
2734
+ },
2735
+ {
2736
+ "epoch": 1.1137648131267093,
2737
+ "grad_norm": 1.250951886177063,
2738
+ "learning_rate": 9.82179915260141e-06,
2739
+ "loss": 0.42703643441200256,
2740
+ "step": 382
2741
+ },
2742
+ {
2743
+ "epoch": 1.1166818596171377,
2744
+ "grad_norm": 1.4528254270553589,
2745
+ "learning_rate": 9.770892544807856e-06,
2746
+ "loss": 0.43801453709602356,
2747
+ "step": 383
2748
+ },
2749
+ {
2750
+ "epoch": 1.1195989061075662,
2751
+ "grad_norm": 1.813859462738037,
2752
+ "learning_rate": 9.719991876757934e-06,
2753
+ "loss": 0.4344240725040436,
2754
+ "step": 384
2755
+ },
2756
+ {
2757
+ "epoch": 1.1225159525979946,
2758
+ "grad_norm": 1.6681253910064697,
2759
+ "learning_rate": 9.669098468080976e-06,
2760
+ "loss": 0.4356998801231384,
2761
+ "step": 385
2762
+ },
2763
+ {
2764
+ "epoch": 1.125432999088423,
2765
+ "grad_norm": 1.3447953462600708,
2766
+ "learning_rate": 9.618213638218117e-06,
2767
+ "loss": 0.43189188838005066,
2768
+ "step": 386
2769
+ },
2770
+ {
2771
+ "epoch": 1.1283500455788513,
2772
+ "grad_norm": 1.9577926397323608,
2773
+ "learning_rate": 9.567338706388074e-06,
2774
+ "loss": 0.34984707832336426,
2775
+ "step": 387
2776
+ },
2777
+ {
2778
+ "epoch": 1.1312670920692798,
2779
+ "grad_norm": 1.5225576162338257,
2780
+ "learning_rate": 9.516474991552965e-06,
2781
+ "loss": 0.4243963062763214,
2782
+ "step": 388
2783
+ },
2784
+ {
2785
+ "epoch": 1.1341841385597082,
2786
+ "grad_norm": 1.7416809797286987,
2787
+ "learning_rate": 9.46562381238408e-06,
2788
+ "loss": 0.3414606750011444,
2789
+ "step": 389
2790
+ },
2791
+ {
2792
+ "epoch": 1.1371011850501367,
2793
+ "grad_norm": 1.8358951807022095,
2794
+ "learning_rate": 9.414786487227732e-06,
2795
+ "loss": 0.387447327375412,
2796
+ "step": 390
2797
+ },
2798
+ {
2799
+ "epoch": 1.1400182315405651,
2800
+ "grad_norm": 1.9706153869628906,
2801
+ "learning_rate": 9.363964334071057e-06,
2802
+ "loss": 0.4599088728427887,
2803
+ "step": 391
2804
+ },
2805
+ {
2806
+ "epoch": 1.1429352780309936,
2807
+ "grad_norm": 1.0604286193847656,
2808
+ "learning_rate": 9.313158670507843e-06,
2809
+ "loss": 0.4633581042289734,
2810
+ "step": 392
2811
+ },
2812
+ {
2813
+ "epoch": 1.145852324521422,
2814
+ "grad_norm": 1.4851202964782715,
2815
+ "learning_rate": 9.262370813704379e-06,
2816
+ "loss": 0.3872259557247162,
2817
+ "step": 393
2818
+ },
2819
+ {
2820
+ "epoch": 1.1487693710118505,
2821
+ "grad_norm": 1.7839159965515137,
2822
+ "learning_rate": 9.21160208036531e-06,
2823
+ "loss": 0.5215944647789001,
2824
+ "step": 394
2825
+ },
2826
+ {
2827
+ "epoch": 1.151686417502279,
2828
+ "grad_norm": 1.3054656982421875,
2829
+ "learning_rate": 9.160853786699475e-06,
2830
+ "loss": 0.4030425548553467,
2831
+ "step": 395
2832
+ },
2833
+ {
2834
+ "epoch": 1.1546034639927074,
2835
+ "grad_norm": 3.8467981815338135,
2836
+ "learning_rate": 9.110127248385827e-06,
2837
+ "loss": 0.4032524824142456,
2838
+ "step": 396
2839
+ },
2840
+ {
2841
+ "epoch": 1.1575205104831359,
2842
+ "grad_norm": 1.8513801097869873,
2843
+ "learning_rate": 9.05942378053928e-06,
2844
+ "loss": 0.46577155590057373,
2845
+ "step": 397
2846
+ },
2847
+ {
2848
+ "epoch": 1.1604375569735643,
2849
+ "grad_norm": 1.312689185142517,
2850
+ "learning_rate": 9.008744697676642e-06,
2851
+ "loss": 0.39114487171173096,
2852
+ "step": 398
2853
+ },
2854
+ {
2855
+ "epoch": 1.1633546034639928,
2856
+ "grad_norm": 1.1996328830718994,
2857
+ "learning_rate": 8.958091313682521e-06,
2858
+ "loss": 0.481199711561203,
2859
+ "step": 399
2860
+ },
2861
+ {
2862
+ "epoch": 1.1662716499544212,
2863
+ "grad_norm": 5.172409534454346,
2864
+ "learning_rate": 8.90746494177528e-06,
2865
+ "loss": 0.3803558945655823,
2866
+ "step": 400
2867
+ },
2868
+ {
2869
+ "epoch": 1.1662716499544212,
2870
+ "eval_loss": 0.4318464398384094,
2871
+ "eval_runtime": 1206.0306,
2872
+ "eval_samples_per_second": 0.524,
2873
+ "eval_steps_per_second": 0.524,
2874
+ "step": 400
2875
+ }
2876
+ ],
2877
+ "logging_steps": 1,
2878
+ "max_steps": 686,
2879
+ "num_input_tokens_seen": 0,
2880
+ "num_train_epochs": 2,
2881
+ "save_steps": 100,
2882
+ "stateful_callbacks": {
2883
+ "TrainerControl": {
2884
+ "args": {
2885
+ "should_epoch_stop": false,
2886
+ "should_evaluate": false,
2887
+ "should_log": false,
2888
+ "should_save": true,
2889
+ "should_training_stop": false
2890
+ },
2891
+ "attributes": {}
2892
+ }
2893
+ },
2894
+ "total_flos": 3.6837285277665853e+18,
2895
+ "train_batch_size": 1,
2896
+ "trial_name": null,
2897
+ "trial_params": null
2898
+ }
cpt_devstral_24B/checkpoints/checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62526ec2433add7ac031c48b1f6ff360f1ade77275765112cbf7cf361d64ca5
3
+ size 5201
cpt_devstral_24B/checkpoints/checkpoint-500/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /workspace/Models/Devstral-Small-2-24B-Instruct-2512
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/workspace/Models/Devstral-Small-2-24B-Instruct-2512
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.18.0
cpt_devstral_24B/checkpoints/checkpoint-500/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "/workspace/Models/Devstral-Small-2-24B-Instruct-2512",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 128,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "k_proj",
33
+ "o_proj",
34
+ "q_proj",
35
+ "v_proj"
36
+ ],
37
+ "target_parameters": null,
38
+ "task_type": "CAUSAL_LM",
39
+ "trainable_token_indices": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
cpt_devstral_24B/checkpoints/checkpoint-500/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d8d694c7fc76d670a2720c03cab875ad7ac3e20bfdbf2d2360ed074f2d69a8
3
+ size 364983848
cpt_devstral_24B/checkpoints/checkpoint-500/chat_template.jinja ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {#- Default system message if no system prompt is passed. #}
2
+ {%- set default_system_message = '' %}
3
+
4
+ {#- Begin of sequence token. #}
5
+ {{- bos_token }}
6
+
7
+ {#- Handle system prompt if it exists. #}
8
+ {#- System prompt supports text content or text chunks. #}
9
+ {%- if messages[0]['role'] == 'system' %}
10
+ {{- '[SYSTEM_PROMPT]' -}}
11
+ {%- if messages[0]['content'] is string %}
12
+ {{- messages[0]['content'] -}}
13
+ {%- else %}
14
+ {%- for block in messages[0]['content'] %}
15
+ {%- if block['type'] == 'text' %}
16
+ {{- block['text'] }}
17
+ {%- else %}
18
+ {{- raise_exception('Only text chunks are supported in system message contents.') }}
19
+ {%- endif %}
20
+ {%- endfor %}
21
+ {%- endif %}
22
+ {{- '[/SYSTEM_PROMPT]' -}}
23
+ {%- set loop_messages = messages[1:] %}
24
+ {%- else %}
25
+ {%- set loop_messages = messages %}
26
+ {%- if default_system_message != '' %}
27
+ {{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }}
28
+ {%- endif %}
29
+ {%- endif %}
30
+
31
+
32
+ {#- Tools definition #}
33
+ {%- set tools_definition = '' %}
34
+ {%- set has_tools = false %}
35
+ {%- if tools is defined and tools is not none and tools|length > 0 %}
36
+ {%- set has_tools = true %}
37
+ {%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %}
38
+ {{- tools_definition }}
39
+ {%- endif %}
40
+
41
+ {#- Checks for alternating user/assistant messages. #}
42
+ {%- set ns = namespace(index=0) %}
43
+ {%- for message in loop_messages %}
44
+ {%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %}
45
+ {%- if (message['role'] == 'user') != (ns.index % 2 == 0) %}
46
+ {{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }}
47
+ {%- endif %}
48
+ {%- set ns.index = ns.index + 1 %}
49
+ {%- endif %}
50
+ {%- endfor %}
51
+
52
+ {#- Handle conversation messages. #}
53
+ {%- for message in loop_messages %}
54
+
55
+ {#- User messages supports text content or text and image chunks. #}
56
+ {%- if message['role'] == 'user' %}
57
+ {%- if message['content'] is string %}
58
+ {{- '[INST]' + message['content'] + '[/INST]' }}
59
+ {%- elif message['content'] | length > 0 %}
60
+ {{- '[INST]' }}
61
+ {%- if message['content'] | length == 2 %}
62
+ {%- set blocks = message['content'] | sort(attribute='type') %}
63
+ {%- else %}
64
+ {%- set blocks = message['content'] %}
65
+ {%- endif %}
66
+ {%- for block in blocks %}
67
+ {%- if block['type'] == 'text' %}
68
+ {{- block['text'] }}
69
+ {%- elif block['type'] in ['image', 'image_url'] %}
70
+ {{- '[IMG]' }}
71
+ {%- else %}
72
+ {{- raise_exception('Only text, image and image_url chunks are supported in user message content.') }}
73
+ {%- endif %}
74
+ {%- endfor %}
75
+ {{- '[/INST]' }}
76
+ {%- else %}
77
+ {{- raise_exception('User message must have a string or a list of chunks in content') }}
78
+ {%- endif %}
79
+
80
+ {#- Assistant messages supports text content or text and image chunks. #}
81
+ {%- elif message['role'] == 'assistant' %}
82
+ {%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}
83
+ {{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }}
84
+ {%- endif %}
85
+
86
+ {%- if message['content'] is string %}
87
+ {{- message['content'] }}
88
+ {%- elif message['content'] | length > 0 %}
89
+ {%- for block in message['content'] %}
90
+ {%- if block['type'] == 'text' %}
91
+ {{- block['text'] }}
92
+ {%- else %}
93
+ {{- raise_exception('Only text chunks are supported in assistant message contents.') }}
94
+ {%- endif %}
95
+ {%- endfor %}
96
+ {%- endif %}
97
+
98
+ {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %}
99
+ {%- for tool in message['tool_calls'] %}
100
+ {%- set arguments = tool['function']['arguments'] %}
101
+ {%- if arguments is not string %}
102
+ {%- set arguments = arguments|tojson|safe %}
103
+ {%- elif arguments == '' %}
104
+ {%- set arguments = '{}' %}
105
+ {%- endif %}
106
+ {{- '[TOOL_CALLS]' + tool['function']['name'] + '[ARGS]' + arguments }}
107
+ {%- endfor %}
108
+ {%- endif %}
109
+
110
+ {#- End of sequence token for each assistant messages. #}
111
+ {{- eos_token }}
112
+
113
+ {#- Tool messages only supports text content. #}
114
+ {%- elif message['role'] == 'tool' %}
115
+ {{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }}
116
+
117
+ {#- Raise exception for unsupported roles. #}
118
+ {%- else %}
119
+ {{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }}
120
+ {%- endif %}
121
+ {%- endfor %}
cpt_devstral_24B/checkpoints/checkpoint-500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:335aa629db8c0490b49c6dbd8a2212e0cb7b06d115ab83a96e1f18d23652855c
3
+ size 160131559
cpt_devstral_24B/checkpoints/checkpoint-500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08185a483370e678d79b51807a988cfe41d318265f91634025d2a0d25c5a3615
3
+ size 14645
cpt_devstral_24B/checkpoints/checkpoint-500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fee4726958e54cc743c86acde73ad8c729bc35b56d33cb9894bdb5eba634ffd9
3
+ size 1465
cpt_devstral_24B/checkpoints/checkpoint-500/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286acad9b0e27fce778ac429763536accf618ccb6ed72963b6f94685e531c5c7
3
+ size 17077402
cpt_devstral_24B/checkpoints/checkpoint-500/tokenizer_config.json ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "extra_special_tokens": [
6
+ "<unk>",
7
+ "<s>",
8
+ "</s>",
9
+ "[INST]",
10
+ "[/INST]",
11
+ "[AVAILABLE_TOOLS]",
12
+ "[/AVAILABLE_TOOLS]",
13
+ "[TOOL_RESULTS]",
14
+ "[/TOOL_RESULTS]",
15
+ "[TOOL_CALLS]",
16
+ "[IMG]",
17
+ "<pad>",
18
+ "[IMG_BREAK]",
19
+ "[IMG_END]",
20
+ "[PREFIX]",
21
+ "[MIDDLE]",
22
+ "[SUFFIX]",
23
+ "[SYSTEM_PROMPT]",
24
+ "[/SYSTEM_PROMPT]",
25
+ "[TOOL_CONTENT]",
26
+ "<SPECIAL_20>",
27
+ "<SPECIAL_21>",
28
+ "<SPECIAL_22>",
29
+ "<SPECIAL_23>",
30
+ "[AUDIO]",
31
+ "[BEGIN_AUDIO]",
32
+ "<SPECIAL_26>",
33
+ "<SPECIAL_27>",
34
+ "<SPECIAL_28>",
35
+ "<SPECIAL_29>",
36
+ "<SPECIAL_30>",
37
+ "<SPECIAL_31>",
38
+ "[ARGS]",
39
+ "[CALL_ID]",
40
+ "[THINK]",
41
+ "[/THINK]",
42
+ "<SPECIAL_36>",
43
+ "<SPECIAL_37>",
44
+ "<SPECIAL_38>",
45
+ "<SPECIAL_39>",
46
+ "<SPECIAL_40>",
47
+ "<SPECIAL_41>",
48
+ "<SPECIAL_42>",
49
+ "<SPECIAL_43>",
50
+ "<SPECIAL_44>",
51
+ "<SPECIAL_45>",
52
+ "<SPECIAL_46>",
53
+ "<SPECIAL_47>",
54
+ "<SPECIAL_48>",
55
+ "<SPECIAL_49>",
56
+ "<SPECIAL_50>",
57
+ "<SPECIAL_51>",
58
+ "<SPECIAL_52>",
59
+ "<SPECIAL_53>",
60
+ "<SPECIAL_54>",
61
+ "<SPECIAL_55>",
62
+ "<SPECIAL_56>",
63
+ "<SPECIAL_57>",
64
+ "<SPECIAL_58>",
65
+ "<SPECIAL_59>",
66
+ "<SPECIAL_60>",
67
+ "<SPECIAL_61>",
68
+ "<SPECIAL_62>",
69
+ "<SPECIAL_63>",
70
+ "<SPECIAL_64>",
71
+ "<SPECIAL_65>",
72
+ "<SPECIAL_66>",
73
+ "<SPECIAL_67>",
74
+ "<SPECIAL_68>",
75
+ "<SPECIAL_69>",
76
+ "<SPECIAL_70>",
77
+ "<SPECIAL_71>",
78
+ "<SPECIAL_72>",
79
+ "<SPECIAL_73>",
80
+ "<SPECIAL_74>",
81
+ "<SPECIAL_75>",
82
+ "<SPECIAL_76>",
83
+ "<SPECIAL_77>",
84
+ "<SPECIAL_78>",
85
+ "<SPECIAL_79>",
86
+ "<SPECIAL_80>",
87
+ "<SPECIAL_81>",
88
+ "<SPECIAL_82>",
89
+ "<SPECIAL_83>",
90
+ "<SPECIAL_84>",
91
+ "<SPECIAL_85>",
92
+ "<SPECIAL_86>",
93
+ "<SPECIAL_87>",
94
+ "<SPECIAL_88>",
95
+ "<SPECIAL_89>",
96
+ "<SPECIAL_90>",
97
+ "<SPECIAL_91>",
98
+ "<SPECIAL_92>",
99
+ "<SPECIAL_93>",
100
+ "<SPECIAL_94>",
101
+ "<SPECIAL_95>",
102
+ "<SPECIAL_96>",
103
+ "<SPECIAL_97>",
104
+ "<SPECIAL_98>",
105
+ "<SPECIAL_99>",
106
+ "<SPECIAL_100>",
107
+ "<SPECIAL_101>",
108
+ "<SPECIAL_102>",
109
+ "<SPECIAL_103>",
110
+ "<SPECIAL_104>",
111
+ "<SPECIAL_105>",
112
+ "<SPECIAL_106>",
113
+ "<SPECIAL_107>",
114
+ "<SPECIAL_108>",
115
+ "<SPECIAL_109>",
116
+ "<SPECIAL_110>",
117
+ "<SPECIAL_111>",
118
+ "<SPECIAL_112>",
119
+ "<SPECIAL_113>",
120
+ "<SPECIAL_114>",
121
+ "<SPECIAL_115>",
122
+ "<SPECIAL_116>",
123
+ "<SPECIAL_117>",
124
+ "<SPECIAL_118>",
125
+ "<SPECIAL_119>",
126
+ "<SPECIAL_120>",
127
+ "<SPECIAL_121>",
128
+ "<SPECIAL_122>",
129
+ "<SPECIAL_123>",
130
+ "<SPECIAL_124>",
131
+ "<SPECIAL_125>",
132
+ "<SPECIAL_126>",
133
+ "<SPECIAL_127>",
134
+ "<SPECIAL_128>",
135
+ "<SPECIAL_129>",
136
+ "<SPECIAL_130>",
137
+ "<SPECIAL_131>",
138
+ "<SPECIAL_132>",
139
+ "<SPECIAL_133>",
140
+ "<SPECIAL_134>",
141
+ "<SPECIAL_135>",
142
+ "<SPECIAL_136>",
143
+ "<SPECIAL_137>",
144
+ "<SPECIAL_138>",
145
+ "<SPECIAL_139>",
146
+ "<SPECIAL_140>",
147
+ "<SPECIAL_141>",
148
+ "<SPECIAL_142>",
149
+ "<SPECIAL_143>",
150
+ "<SPECIAL_144>",
151
+ "<SPECIAL_145>",
152
+ "<SPECIAL_146>",
153
+ "<SPECIAL_147>",
154
+ "<SPECIAL_148>",
155
+ "<SPECIAL_149>",
156
+ "<SPECIAL_150>",
157
+ "<SPECIAL_151>",
158
+ "<SPECIAL_152>",
159
+ "<SPECIAL_153>",
160
+ "<SPECIAL_154>",
161
+ "<SPECIAL_155>",
162
+ "<SPECIAL_156>",
163
+ "<SPECIAL_157>",
164
+ "<SPECIAL_158>",
165
+ "<SPECIAL_159>",
166
+ "<SPECIAL_160>",
167
+ "<SPECIAL_161>",
168
+ "<SPECIAL_162>",
169
+ "<SPECIAL_163>",
170
+ "<SPECIAL_164>",
171
+ "<SPECIAL_165>",
172
+ "<SPECIAL_166>",
173
+ "<SPECIAL_167>",
174
+ "<SPECIAL_168>",
175
+ "<SPECIAL_169>",
176
+ "<SPECIAL_170>",
177
+ "<SPECIAL_171>",
178
+ "<SPECIAL_172>",
179
+ "<SPECIAL_173>",
180
+ "<SPECIAL_174>",
181
+ "<SPECIAL_175>",
182
+ "<SPECIAL_176>",
183
+ "<SPECIAL_177>",
184
+ "<SPECIAL_178>",
185
+ "<SPECIAL_179>",
186
+ "<SPECIAL_180>",
187
+ "<SPECIAL_181>",
188
+ "<SPECIAL_182>",
189
+ "<SPECIAL_183>",
190
+ "<SPECIAL_184>",
191
+ "<SPECIAL_185>",
192
+ "<SPECIAL_186>",
193
+ "<SPECIAL_187>",
194
+ "<SPECIAL_188>",
195
+ "<SPECIAL_189>",
196
+ "<SPECIAL_190>",
197
+ "<SPECIAL_191>",
198
+ "<SPECIAL_192>",
199
+ "<SPECIAL_193>",
200
+ "<SPECIAL_194>",
201
+ "<SPECIAL_195>",
202
+ "<SPECIAL_196>",
203
+ "<SPECIAL_197>",
204
+ "<SPECIAL_198>",
205
+ "<SPECIAL_199>",
206
+ "<SPECIAL_200>",
207
+ "<SPECIAL_201>",
208
+ "<SPECIAL_202>",
209
+ "<SPECIAL_203>",
210
+ "<SPECIAL_204>",
211
+ "<SPECIAL_205>",
212
+ "<SPECIAL_206>",
213
+ "<SPECIAL_207>",
214
+ "<SPECIAL_208>",
215
+ "<SPECIAL_209>",
216
+ "<SPECIAL_210>",
217
+ "<SPECIAL_211>",
218
+ "<SPECIAL_212>",
219
+ "<SPECIAL_213>",
220
+ "<SPECIAL_214>",
221
+ "<SPECIAL_215>",
222
+ "<SPECIAL_216>",
223
+ "<SPECIAL_217>",
224
+ "<SPECIAL_218>",
225
+ "<SPECIAL_219>",
226
+ "<SPECIAL_220>",
227
+ "<SPECIAL_221>",
228
+ "<SPECIAL_222>",
229
+ "<SPECIAL_223>",
230
+ "<SPECIAL_224>",
231
+ "<SPECIAL_225>",
232
+ "<SPECIAL_226>",
233
+ "<SPECIAL_227>",
234
+ "<SPECIAL_228>",
235
+ "<SPECIAL_229>",
236
+ "<SPECIAL_230>",
237
+ "<SPECIAL_231>",
238
+ "<SPECIAL_232>",
239
+ "<SPECIAL_233>",
240
+ "<SPECIAL_234>",
241
+ "<SPECIAL_235>",
242
+ "<SPECIAL_236>",
243
+ "<SPECIAL_237>",
244
+ "<SPECIAL_238>",
245
+ "<SPECIAL_239>",
246
+ "<SPECIAL_240>",
247
+ "<SPECIAL_241>",
248
+ "<SPECIAL_242>",
249
+ "<SPECIAL_243>",
250
+ "<SPECIAL_244>",
251
+ "<SPECIAL_245>",
252
+ "<SPECIAL_246>",
253
+ "<SPECIAL_247>",
254
+ "<SPECIAL_248>",
255
+ "<SPECIAL_249>",
256
+ "<SPECIAL_250>",
257
+ "<SPECIAL_251>",
258
+ "<SPECIAL_252>",
259
+ "<SPECIAL_253>",
260
+ "<SPECIAL_254>",
261
+ "<SPECIAL_255>",
262
+ "<SPECIAL_256>",
263
+ "<SPECIAL_257>",
264
+ "<SPECIAL_258>",
265
+ "<SPECIAL_259>",
266
+ "<SPECIAL_260>",
267
+ "<SPECIAL_261>",
268
+ "<SPECIAL_262>",
269
+ "<SPECIAL_263>",
270
+ "<SPECIAL_264>",
271
+ "<SPECIAL_265>",
272
+ "<SPECIAL_266>",
273
+ "<SPECIAL_267>",
274
+ "<SPECIAL_268>",
275
+ "<SPECIAL_269>",
276
+ "<SPECIAL_270>",
277
+ "<SPECIAL_271>",
278
+ "<SPECIAL_272>",
279
+ "<SPECIAL_273>",
280
+ "<SPECIAL_274>",
281
+ "<SPECIAL_275>",
282
+ "<SPECIAL_276>",
283
+ "<SPECIAL_277>",
284
+ "<SPECIAL_278>",
285
+ "<SPECIAL_279>",
286
+ "<SPECIAL_280>",
287
+ "<SPECIAL_281>",
288
+ "<SPECIAL_282>",
289
+ "<SPECIAL_283>",
290
+ "<SPECIAL_284>",
291
+ "<SPECIAL_285>",
292
+ "<SPECIAL_286>",
293
+ "<SPECIAL_287>",
294
+ "<SPECIAL_288>",
295
+ "<SPECIAL_289>",
296
+ "<SPECIAL_290>",
297
+ "<SPECIAL_291>",
298
+ "<SPECIAL_292>",
299
+ "<SPECIAL_293>",
300
+ "<SPECIAL_294>",
301
+ "<SPECIAL_295>",
302
+ "<SPECIAL_296>",
303
+ "<SPECIAL_297>",
304
+ "<SPECIAL_298>",
305
+ "<SPECIAL_299>",
306
+ "<SPECIAL_300>",
307
+ "<SPECIAL_301>",
308
+ "<SPECIAL_302>",
309
+ "<SPECIAL_303>",
310
+ "<SPECIAL_304>",
311
+ "<SPECIAL_305>",
312
+ "<SPECIAL_306>",
313
+ "<SPECIAL_307>",
314
+ "<SPECIAL_308>",
315
+ "<SPECIAL_309>",
316
+ "<SPECIAL_310>",
317
+ "<SPECIAL_311>",
318
+ "<SPECIAL_312>",
319
+ "<SPECIAL_313>",
320
+ "<SPECIAL_314>",
321
+ "<SPECIAL_315>",
322
+ "<SPECIAL_316>",
323
+ "<SPECIAL_317>",
324
+ "<SPECIAL_318>",
325
+ "<SPECIAL_319>",
326
+ "<SPECIAL_320>",
327
+ "<SPECIAL_321>",
328
+ "<SPECIAL_322>",
329
+ "<SPECIAL_323>",
330
+ "<SPECIAL_324>",
331
+ "<SPECIAL_325>",
332
+ "<SPECIAL_326>",
333
+ "<SPECIAL_327>",
334
+ "<SPECIAL_328>",
335
+ "<SPECIAL_329>",
336
+ "<SPECIAL_330>",
337
+ "<SPECIAL_331>",
338
+ "<SPECIAL_332>",
339
+ "<SPECIAL_333>",
340
+ "<SPECIAL_334>",
341
+ "<SPECIAL_335>",
342
+ "<SPECIAL_336>",
343
+ "<SPECIAL_337>",
344
+ "<SPECIAL_338>",
345
+ "<SPECIAL_339>",
346
+ "<SPECIAL_340>",
347
+ "<SPECIAL_341>",
348
+ "<SPECIAL_342>",
349
+ "<SPECIAL_343>",
350
+ "<SPECIAL_344>",
351
+ "<SPECIAL_345>",
352
+ "<SPECIAL_346>",
353
+ "<SPECIAL_347>",
354
+ "<SPECIAL_348>",
355
+ "<SPECIAL_349>",
356
+ "<SPECIAL_350>",
357
+ "<SPECIAL_351>",
358
+ "<SPECIAL_352>",
359
+ "<SPECIAL_353>",
360
+ "<SPECIAL_354>",
361
+ "<SPECIAL_355>",
362
+ "<SPECIAL_356>",
363
+ "<SPECIAL_357>",
364
+ "<SPECIAL_358>",
365
+ "<SPECIAL_359>",
366
+ "<SPECIAL_360>",
367
+ "<SPECIAL_361>",
368
+ "<SPECIAL_362>",
369
+ "<SPECIAL_363>",
370
+ "<SPECIAL_364>",
371
+ "<SPECIAL_365>",
372
+ "<SPECIAL_366>",
373
+ "<SPECIAL_367>",
374
+ "<SPECIAL_368>",
375
+ "<SPECIAL_369>",
376
+ "<SPECIAL_370>",
377
+ "<SPECIAL_371>",
378
+ "<SPECIAL_372>",
379
+ "<SPECIAL_373>",
380
+ "<SPECIAL_374>",
381
+ "<SPECIAL_375>",
382
+ "<SPECIAL_376>",
383
+ "<SPECIAL_377>",
384
+ "<SPECIAL_378>",
385
+ "<SPECIAL_379>",
386
+ "<SPECIAL_380>",
387
+ "<SPECIAL_381>",
388
+ "<SPECIAL_382>",
389
+ "<SPECIAL_383>",
390
+ "<SPECIAL_384>",
391
+ "<SPECIAL_385>",
392
+ "<SPECIAL_386>",
393
+ "<SPECIAL_387>",
394
+ "<SPECIAL_388>",
395
+ "<SPECIAL_389>",
396
+ "<SPECIAL_390>",
397
+ "<SPECIAL_391>",
398
+ "<SPECIAL_392>",
399
+ "<SPECIAL_393>",
400
+ "<SPECIAL_394>",
401
+ "<SPECIAL_395>",
402
+ "<SPECIAL_396>",
403
+ "<SPECIAL_397>",
404
+ "<SPECIAL_398>",
405
+ "<SPECIAL_399>",
406
+ "<SPECIAL_400>",
407
+ "<SPECIAL_401>",
408
+ "<SPECIAL_402>",
409
+ "<SPECIAL_403>",
410
+ "<SPECIAL_404>",
411
+ "<SPECIAL_405>",
412
+ "<SPECIAL_406>",
413
+ "<SPECIAL_407>",
414
+ "<SPECIAL_408>",
415
+ "<SPECIAL_409>",
416
+ "<SPECIAL_410>",
417
+ "<SPECIAL_411>",
418
+ "<SPECIAL_412>",
419
+ "<SPECIAL_413>",
420
+ "<SPECIAL_414>",
421
+ "<SPECIAL_415>",
422
+ "<SPECIAL_416>",
423
+ "<SPECIAL_417>",
424
+ "<SPECIAL_418>",
425
+ "<SPECIAL_419>",
426
+ "<SPECIAL_420>",
427
+ "<SPECIAL_421>",
428
+ "<SPECIAL_422>",
429
+ "<SPECIAL_423>",
430
+ "<SPECIAL_424>",
431
+ "<SPECIAL_425>",
432
+ "<SPECIAL_426>",
433
+ "<SPECIAL_427>",
434
+ "<SPECIAL_428>",
435
+ "<SPECIAL_429>",
436
+ "<SPECIAL_430>",
437
+ "<SPECIAL_431>",
438
+ "<SPECIAL_432>",
439
+ "<SPECIAL_433>",
440
+ "<SPECIAL_434>",
441
+ "<SPECIAL_435>",
442
+ "<SPECIAL_436>",
443
+ "<SPECIAL_437>",
444
+ "<SPECIAL_438>",
445
+ "<SPECIAL_439>",
446
+ "<SPECIAL_440>",
447
+ "<SPECIAL_441>",
448
+ "<SPECIAL_442>",
449
+ "<SPECIAL_443>",
450
+ "<SPECIAL_444>",
451
+ "<SPECIAL_445>",
452
+ "<SPECIAL_446>",
453
+ "<SPECIAL_447>",
454
+ "<SPECIAL_448>",
455
+ "<SPECIAL_449>",
456
+ "<SPECIAL_450>",
457
+ "<SPECIAL_451>",
458
+ "<SPECIAL_452>",
459
+ "<SPECIAL_453>",
460
+ "<SPECIAL_454>",
461
+ "<SPECIAL_455>",
462
+ "<SPECIAL_456>",
463
+ "<SPECIAL_457>",
464
+ "<SPECIAL_458>",
465
+ "<SPECIAL_459>",
466
+ "<SPECIAL_460>",
467
+ "<SPECIAL_461>",
468
+ "<SPECIAL_462>",
469
+ "<SPECIAL_463>",
470
+ "<SPECIAL_464>",
471
+ "<SPECIAL_465>",
472
+ "<SPECIAL_466>",
473
+ "<SPECIAL_467>",
474
+ "<SPECIAL_468>",
475
+ "<SPECIAL_469>",
476
+ "<SPECIAL_470>",
477
+ "<SPECIAL_471>",
478
+ "<SPECIAL_472>",
479
+ "<SPECIAL_473>",
480
+ "<SPECIAL_474>",
481
+ "<SPECIAL_475>",
482
+ "<SPECIAL_476>",
483
+ "<SPECIAL_477>",
484
+ "<SPECIAL_478>",
485
+ "<SPECIAL_479>",
486
+ "<SPECIAL_480>",
487
+ "<SPECIAL_481>",
488
+ "<SPECIAL_482>",
489
+ "<SPECIAL_483>",
490
+ "<SPECIAL_484>",
491
+ "<SPECIAL_485>",
492
+ "<SPECIAL_486>",
493
+ "<SPECIAL_487>",
494
+ "<SPECIAL_488>",
495
+ "<SPECIAL_489>",
496
+ "<SPECIAL_490>",
497
+ "<SPECIAL_491>",
498
+ "<SPECIAL_492>",
499
+ "<SPECIAL_493>",
500
+ "<SPECIAL_494>",
501
+ "<SPECIAL_495>",
502
+ "<SPECIAL_496>",
503
+ "<SPECIAL_497>",
504
+ "<SPECIAL_498>",
505
+ "<SPECIAL_499>",
506
+ "<SPECIAL_500>",
507
+ "<SPECIAL_501>",
508
+ "<SPECIAL_502>",
509
+ "<SPECIAL_503>",
510
+ "<SPECIAL_504>",
511
+ "<SPECIAL_505>",
512
+ "<SPECIAL_506>",
513
+ "<SPECIAL_507>",
514
+ "<SPECIAL_508>",
515
+ "<SPECIAL_509>",
516
+ "<SPECIAL_510>",
517
+ "<SPECIAL_511>",
518
+ "<SPECIAL_512>",
519
+ "<SPECIAL_513>",
520
+ "<SPECIAL_514>",
521
+ "<SPECIAL_515>",
522
+ "<SPECIAL_516>",
523
+ "<SPECIAL_517>",
524
+ "<SPECIAL_518>",
525
+ "<SPECIAL_519>",
526
+ "<SPECIAL_520>",
527
+ "<SPECIAL_521>",
528
+ "<SPECIAL_522>",
529
+ "<SPECIAL_523>",
530
+ "<SPECIAL_524>",
531
+ "<SPECIAL_525>",
532
+ "<SPECIAL_526>",
533
+ "<SPECIAL_527>",
534
+ "<SPECIAL_528>",
535
+ "<SPECIAL_529>",
536
+ "<SPECIAL_530>",
537
+ "<SPECIAL_531>",
538
+ "<SPECIAL_532>",
539
+ "<SPECIAL_533>",
540
+ "<SPECIAL_534>",
541
+ "<SPECIAL_535>",
542
+ "<SPECIAL_536>",
543
+ "<SPECIAL_537>",
544
+ "<SPECIAL_538>",
545
+ "<SPECIAL_539>",
546
+ "<SPECIAL_540>",
547
+ "<SPECIAL_541>",
548
+ "<SPECIAL_542>",
549
+ "<SPECIAL_543>",
550
+ "<SPECIAL_544>",
551
+ "<SPECIAL_545>",
552
+ "<SPECIAL_546>",
553
+ "<SPECIAL_547>",
554
+ "<SPECIAL_548>",
555
+ "<SPECIAL_549>",
556
+ "<SPECIAL_550>",
557
+ "<SPECIAL_551>",
558
+ "<SPECIAL_552>",
559
+ "<SPECIAL_553>",
560
+ "<SPECIAL_554>",
561
+ "<SPECIAL_555>",
562
+ "<SPECIAL_556>",
563
+ "<SPECIAL_557>",
564
+ "<SPECIAL_558>",
565
+ "<SPECIAL_559>",
566
+ "<SPECIAL_560>",
567
+ "<SPECIAL_561>",
568
+ "<SPECIAL_562>",
569
+ "<SPECIAL_563>",
570
+ "<SPECIAL_564>",
571
+ "<SPECIAL_565>",
572
+ "<SPECIAL_566>",
573
+ "<SPECIAL_567>",
574
+ "<SPECIAL_568>",
575
+ "<SPECIAL_569>",
576
+ "<SPECIAL_570>",
577
+ "<SPECIAL_571>",
578
+ "<SPECIAL_572>",
579
+ "<SPECIAL_573>",
580
+ "<SPECIAL_574>",
581
+ "<SPECIAL_575>",
582
+ "<SPECIAL_576>",
583
+ "<SPECIAL_577>",
584
+ "<SPECIAL_578>",
585
+ "<SPECIAL_579>",
586
+ "<SPECIAL_580>",
587
+ "<SPECIAL_581>",
588
+ "<SPECIAL_582>",
589
+ "<SPECIAL_583>",
590
+ "<SPECIAL_584>",
591
+ "<SPECIAL_585>",
592
+ "<SPECIAL_586>",
593
+ "<SPECIAL_587>",
594
+ "<SPECIAL_588>",
595
+ "<SPECIAL_589>",
596
+ "<SPECIAL_590>",
597
+ "<SPECIAL_591>",
598
+ "<SPECIAL_592>",
599
+ "<SPECIAL_593>",
600
+ "<SPECIAL_594>",
601
+ "<SPECIAL_595>",
602
+ "<SPECIAL_596>",
603
+ "<SPECIAL_597>",
604
+ "<SPECIAL_598>",
605
+ "<SPECIAL_599>",
606
+ "<SPECIAL_600>",
607
+ "<SPECIAL_601>",
608
+ "<SPECIAL_602>",
609
+ "<SPECIAL_603>",
610
+ "<SPECIAL_604>",
611
+ "<SPECIAL_605>",
612
+ "<SPECIAL_606>",
613
+ "<SPECIAL_607>",
614
+ "<SPECIAL_608>",
615
+ "<SPECIAL_609>",
616
+ "<SPECIAL_610>",
617
+ "<SPECIAL_611>",
618
+ "<SPECIAL_612>",
619
+ "<SPECIAL_613>",
620
+ "<SPECIAL_614>",
621
+ "<SPECIAL_615>",
622
+ "<SPECIAL_616>",
623
+ "<SPECIAL_617>",
624
+ "<SPECIAL_618>",
625
+ "<SPECIAL_619>",
626
+ "<SPECIAL_620>",
627
+ "<SPECIAL_621>",
628
+ "<SPECIAL_622>",
629
+ "<SPECIAL_623>",
630
+ "<SPECIAL_624>",
631
+ "<SPECIAL_625>",
632
+ "<SPECIAL_626>",
633
+ "<SPECIAL_627>",
634
+ "<SPECIAL_628>",
635
+ "<SPECIAL_629>",
636
+ "<SPECIAL_630>",
637
+ "<SPECIAL_631>",
638
+ "<SPECIAL_632>",
639
+ "<SPECIAL_633>",
640
+ "<SPECIAL_634>",
641
+ "<SPECIAL_635>",
642
+ "<SPECIAL_636>",
643
+ "<SPECIAL_637>",
644
+ "<SPECIAL_638>",
645
+ "<SPECIAL_639>",
646
+ "<SPECIAL_640>",
647
+ "<SPECIAL_641>",
648
+ "<SPECIAL_642>",
649
+ "<SPECIAL_643>",
650
+ "<SPECIAL_644>",
651
+ "<SPECIAL_645>",
652
+ "<SPECIAL_646>",
653
+ "<SPECIAL_647>",
654
+ "<SPECIAL_648>",
655
+ "<SPECIAL_649>",
656
+ "<SPECIAL_650>",
657
+ "<SPECIAL_651>",
658
+ "<SPECIAL_652>",
659
+ "<SPECIAL_653>",
660
+ "<SPECIAL_654>",
661
+ "<SPECIAL_655>",
662
+ "<SPECIAL_656>",
663
+ "<SPECIAL_657>",
664
+ "<SPECIAL_658>",
665
+ "<SPECIAL_659>",
666
+ "<SPECIAL_660>",
667
+ "<SPECIAL_661>",
668
+ "<SPECIAL_662>",
669
+ "<SPECIAL_663>",
670
+ "<SPECIAL_664>",
671
+ "<SPECIAL_665>",
672
+ "<SPECIAL_666>",
673
+ "<SPECIAL_667>",
674
+ "<SPECIAL_668>",
675
+ "<SPECIAL_669>",
676
+ "<SPECIAL_670>",
677
+ "<SPECIAL_671>",
678
+ "<SPECIAL_672>",
679
+ "<SPECIAL_673>",
680
+ "<SPECIAL_674>",
681
+ "<SPECIAL_675>",
682
+ "<SPECIAL_676>",
683
+ "<SPECIAL_677>",
684
+ "<SPECIAL_678>",
685
+ "<SPECIAL_679>",
686
+ "<SPECIAL_680>",
687
+ "<SPECIAL_681>",
688
+ "<SPECIAL_682>",
689
+ "<SPECIAL_683>",
690
+ "<SPECIAL_684>",
691
+ "<SPECIAL_685>",
692
+ "<SPECIAL_686>",
693
+ "<SPECIAL_687>",
694
+ "<SPECIAL_688>",
695
+ "<SPECIAL_689>",
696
+ "<SPECIAL_690>",
697
+ "<SPECIAL_691>",
698
+ "<SPECIAL_692>",
699
+ "<SPECIAL_693>",
700
+ "<SPECIAL_694>",
701
+ "<SPECIAL_695>",
702
+ "<SPECIAL_696>",
703
+ "<SPECIAL_697>",
704
+ "<SPECIAL_698>",
705
+ "<SPECIAL_699>",
706
+ "<SPECIAL_700>",
707
+ "<SPECIAL_701>",
708
+ "<SPECIAL_702>",
709
+ "<SPECIAL_703>",
710
+ "<SPECIAL_704>",
711
+ "<SPECIAL_705>",
712
+ "<SPECIAL_706>",
713
+ "<SPECIAL_707>",
714
+ "<SPECIAL_708>",
715
+ "<SPECIAL_709>",
716
+ "<SPECIAL_710>",
717
+ "<SPECIAL_711>",
718
+ "<SPECIAL_712>",
719
+ "<SPECIAL_713>",
720
+ "<SPECIAL_714>",
721
+ "<SPECIAL_715>",
722
+ "<SPECIAL_716>",
723
+ "<SPECIAL_717>",
724
+ "<SPECIAL_718>",
725
+ "<SPECIAL_719>",
726
+ "<SPECIAL_720>",
727
+ "<SPECIAL_721>",
728
+ "<SPECIAL_722>",
729
+ "<SPECIAL_723>",
730
+ "<SPECIAL_724>",
731
+ "<SPECIAL_725>",
732
+ "<SPECIAL_726>",
733
+ "<SPECIAL_727>",
734
+ "<SPECIAL_728>",
735
+ "<SPECIAL_729>",
736
+ "<SPECIAL_730>",
737
+ "<SPECIAL_731>",
738
+ "<SPECIAL_732>",
739
+ "<SPECIAL_733>",
740
+ "<SPECIAL_734>",
741
+ "<SPECIAL_735>",
742
+ "<SPECIAL_736>",
743
+ "<SPECIAL_737>",
744
+ "<SPECIAL_738>",
745
+ "<SPECIAL_739>",
746
+ "<SPECIAL_740>",
747
+ "<SPECIAL_741>",
748
+ "<SPECIAL_742>",
749
+ "<SPECIAL_743>",
750
+ "<SPECIAL_744>",
751
+ "<SPECIAL_745>",
752
+ "<SPECIAL_746>",
753
+ "<SPECIAL_747>",
754
+ "<SPECIAL_748>",
755
+ "<SPECIAL_749>",
756
+ "<SPECIAL_750>",
757
+ "<SPECIAL_751>",
758
+ "<SPECIAL_752>",
759
+ "<SPECIAL_753>",
760
+ "<SPECIAL_754>",
761
+ "<SPECIAL_755>",
762
+ "<SPECIAL_756>",
763
+ "<SPECIAL_757>",
764
+ "<SPECIAL_758>",
765
+ "<SPECIAL_759>",
766
+ "<SPECIAL_760>",
767
+ "<SPECIAL_761>",
768
+ "<SPECIAL_762>",
769
+ "<SPECIAL_763>",
770
+ "<SPECIAL_764>",
771
+ "<SPECIAL_765>",
772
+ "<SPECIAL_766>",
773
+ "<SPECIAL_767>",
774
+ "<SPECIAL_768>",
775
+ "<SPECIAL_769>",
776
+ "<SPECIAL_770>",
777
+ "<SPECIAL_771>",
778
+ "<SPECIAL_772>",
779
+ "<SPECIAL_773>",
780
+ "<SPECIAL_774>",
781
+ "<SPECIAL_775>",
782
+ "<SPECIAL_776>",
783
+ "<SPECIAL_777>",
784
+ "<SPECIAL_778>",
785
+ "<SPECIAL_779>",
786
+ "<SPECIAL_780>",
787
+ "<SPECIAL_781>",
788
+ "<SPECIAL_782>",
789
+ "<SPECIAL_783>",
790
+ "<SPECIAL_784>",
791
+ "<SPECIAL_785>",
792
+ "<SPECIAL_786>",
793
+ "<SPECIAL_787>",
794
+ "<SPECIAL_788>",
795
+ "<SPECIAL_789>",
796
+ "<SPECIAL_790>",
797
+ "<SPECIAL_791>",
798
+ "<SPECIAL_792>",
799
+ "<SPECIAL_793>",
800
+ "<SPECIAL_794>",
801
+ "<SPECIAL_795>",
802
+ "<SPECIAL_796>",
803
+ "<SPECIAL_797>",
804
+ "<SPECIAL_798>",
805
+ "<SPECIAL_799>",
806
+ "<SPECIAL_800>",
807
+ "<SPECIAL_801>",
808
+ "<SPECIAL_802>",
809
+ "<SPECIAL_803>",
810
+ "<SPECIAL_804>",
811
+ "<SPECIAL_805>",
812
+ "<SPECIAL_806>",
813
+ "<SPECIAL_807>",
814
+ "<SPECIAL_808>",
815
+ "<SPECIAL_809>",
816
+ "<SPECIAL_810>",
817
+ "<SPECIAL_811>",
818
+ "<SPECIAL_812>",
819
+ "<SPECIAL_813>",
820
+ "<SPECIAL_814>",
821
+ "<SPECIAL_815>",
822
+ "<SPECIAL_816>",
823
+ "<SPECIAL_817>",
824
+ "<SPECIAL_818>",
825
+ "<SPECIAL_819>",
826
+ "<SPECIAL_820>",
827
+ "<SPECIAL_821>",
828
+ "<SPECIAL_822>",
829
+ "<SPECIAL_823>",
830
+ "<SPECIAL_824>",
831
+ "<SPECIAL_825>",
832
+ "<SPECIAL_826>",
833
+ "<SPECIAL_827>",
834
+ "<SPECIAL_828>",
835
+ "<SPECIAL_829>",
836
+ "<SPECIAL_830>",
837
+ "<SPECIAL_831>",
838
+ "<SPECIAL_832>",
839
+ "<SPECIAL_833>",
840
+ "<SPECIAL_834>",
841
+ "<SPECIAL_835>",
842
+ "<SPECIAL_836>",
843
+ "<SPECIAL_837>",
844
+ "<SPECIAL_838>",
845
+ "<SPECIAL_839>",
846
+ "<SPECIAL_840>",
847
+ "<SPECIAL_841>",
848
+ "<SPECIAL_842>",
849
+ "<SPECIAL_843>",
850
+ "<SPECIAL_844>",
851
+ "<SPECIAL_845>",
852
+ "<SPECIAL_846>",
853
+ "<SPECIAL_847>",
854
+ "<SPECIAL_848>",
855
+ "<SPECIAL_849>",
856
+ "<SPECIAL_850>",
857
+ "<SPECIAL_851>",
858
+ "<SPECIAL_852>",
859
+ "<SPECIAL_853>",
860
+ "<SPECIAL_854>",
861
+ "<SPECIAL_855>",
862
+ "<SPECIAL_856>",
863
+ "<SPECIAL_857>",
864
+ "<SPECIAL_858>",
865
+ "<SPECIAL_859>",
866
+ "<SPECIAL_860>",
867
+ "<SPECIAL_861>",
868
+ "<SPECIAL_862>",
869
+ "<SPECIAL_863>",
870
+ "<SPECIAL_864>",
871
+ "<SPECIAL_865>",
872
+ "<SPECIAL_866>",
873
+ "<SPECIAL_867>",
874
+ "<SPECIAL_868>",
875
+ "<SPECIAL_869>",
876
+ "<SPECIAL_870>",
877
+ "<SPECIAL_871>",
878
+ "<SPECIAL_872>",
879
+ "<SPECIAL_873>",
880
+ "<SPECIAL_874>",
881
+ "<SPECIAL_875>",
882
+ "<SPECIAL_876>",
883
+ "<SPECIAL_877>",
884
+ "<SPECIAL_878>",
885
+ "<SPECIAL_879>",
886
+ "<SPECIAL_880>",
887
+ "<SPECIAL_881>",
888
+ "<SPECIAL_882>",
889
+ "<SPECIAL_883>",
890
+ "<SPECIAL_884>",
891
+ "<SPECIAL_885>",
892
+ "<SPECIAL_886>",
893
+ "<SPECIAL_887>",
894
+ "<SPECIAL_888>",
895
+ "<SPECIAL_889>",
896
+ "<SPECIAL_890>",
897
+ "<SPECIAL_891>",
898
+ "<SPECIAL_892>",
899
+ "<SPECIAL_893>",
900
+ "<SPECIAL_894>",
901
+ "<SPECIAL_895>",
902
+ "<SPECIAL_896>",
903
+ "<SPECIAL_897>",
904
+ "<SPECIAL_898>",
905
+ "<SPECIAL_899>",
906
+ "<SPECIAL_900>",
907
+ "<SPECIAL_901>",
908
+ "<SPECIAL_902>",
909
+ "<SPECIAL_903>",
910
+ "<SPECIAL_904>",
911
+ "<SPECIAL_905>",
912
+ "<SPECIAL_906>",
913
+ "<SPECIAL_907>",
914
+ "<SPECIAL_908>",
915
+ "<SPECIAL_909>",
916
+ "<SPECIAL_910>",
917
+ "<SPECIAL_911>",
918
+ "<SPECIAL_912>",
919
+ "<SPECIAL_913>",
920
+ "<SPECIAL_914>",
921
+ "<SPECIAL_915>",
922
+ "<SPECIAL_916>",
923
+ "<SPECIAL_917>",
924
+ "<SPECIAL_918>",
925
+ "<SPECIAL_919>",
926
+ "<SPECIAL_920>",
927
+ "<SPECIAL_921>",
928
+ "<SPECIAL_922>",
929
+ "<SPECIAL_923>",
930
+ "<SPECIAL_924>",
931
+ "<SPECIAL_925>",
932
+ "<SPECIAL_926>",
933
+ "<SPECIAL_927>",
934
+ "<SPECIAL_928>",
935
+ "<SPECIAL_929>",
936
+ "<SPECIAL_930>",
937
+ "<SPECIAL_931>",
938
+ "<SPECIAL_932>",
939
+ "<SPECIAL_933>",
940
+ "<SPECIAL_934>",
941
+ "<SPECIAL_935>",
942
+ "<SPECIAL_936>",
943
+ "<SPECIAL_937>",
944
+ "<SPECIAL_938>",
945
+ "<SPECIAL_939>",
946
+ "<SPECIAL_940>",
947
+ "<SPECIAL_941>",
948
+ "<SPECIAL_942>",
949
+ "<SPECIAL_943>",
950
+ "<SPECIAL_944>",
951
+ "<SPECIAL_945>",
952
+ "<SPECIAL_946>",
953
+ "<SPECIAL_947>",
954
+ "<SPECIAL_948>",
955
+ "<SPECIAL_949>",
956
+ "<SPECIAL_950>",
957
+ "<SPECIAL_951>",
958
+ "<SPECIAL_952>",
959
+ "<SPECIAL_953>",
960
+ "<SPECIAL_954>",
961
+ "<SPECIAL_955>",
962
+ "<SPECIAL_956>",
963
+ "<SPECIAL_957>",
964
+ "<SPECIAL_958>",
965
+ "<SPECIAL_959>",
966
+ "<SPECIAL_960>",
967
+ "<SPECIAL_961>",
968
+ "<SPECIAL_962>",
969
+ "<SPECIAL_963>",
970
+ "<SPECIAL_964>",
971
+ "<SPECIAL_965>",
972
+ "<SPECIAL_966>",
973
+ "<SPECIAL_967>",
974
+ "<SPECIAL_968>",
975
+ "<SPECIAL_969>",
976
+ "<SPECIAL_970>",
977
+ "<SPECIAL_971>",
978
+ "<SPECIAL_972>",
979
+ "<SPECIAL_973>",
980
+ "<SPECIAL_974>",
981
+ "<SPECIAL_975>",
982
+ "<SPECIAL_976>",
983
+ "<SPECIAL_977>",
984
+ "<SPECIAL_978>",
985
+ "<SPECIAL_979>",
986
+ "<SPECIAL_980>",
987
+ "<SPECIAL_981>",
988
+ "<SPECIAL_982>",
989
+ "<SPECIAL_983>",
990
+ "<SPECIAL_984>",
991
+ "<SPECIAL_985>",
992
+ "<SPECIAL_986>",
993
+ "<SPECIAL_987>",
994
+ "<SPECIAL_988>",
995
+ "<SPECIAL_989>",
996
+ "<SPECIAL_990>",
997
+ "<SPECIAL_991>",
998
+ "<SPECIAL_992>",
999
+ "<SPECIAL_993>",
1000
+ "<SPECIAL_994>",
1001
+ "<SPECIAL_995>",
1002
+ "<SPECIAL_996>",
1003
+ "<SPECIAL_997>",
1004
+ "<SPECIAL_998>",
1005
+ "<SPECIAL_999>"
1006
+ ],
1007
+ "is_local": true,
1008
+ "model_max_length": 1000000000000000019884624838656,
1009
+ "pad_token": "<pad>",
1010
+ "processor_class": "PixtralProcessor",
1011
+ "tokenizer_class": "TokenizersBackend",
1012
+ "unk_token": "<unk>"
1013
+ }
cpt_devstral_24B/checkpoints/checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,3614 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 500,
3
+ "best_metric": 0.40706494450569153,
4
+ "best_model_checkpoint": "runs/cpt_run_v1/checkpoints/checkpoint-500",
5
+ "epoch": 1.4579762989972653,
6
+ "eval_steps": 50,
7
+ "global_step": 500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0029170464904284413,
14
+ "grad_norm": 1.1577509641647339,
15
+ "learning_rate": 0.0,
16
+ "loss": 0.9893555045127869,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.005834092980856883,
21
+ "grad_norm": 0.9491796493530273,
22
+ "learning_rate": 2.8985507246376816e-07,
23
+ "loss": 0.8791205883026123,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.008751139471285323,
28
+ "grad_norm": 1.1600768566131592,
29
+ "learning_rate": 5.797101449275363e-07,
30
+ "loss": 0.9858248233795166,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.011668185961713765,
35
+ "grad_norm": 1.2298306226730347,
36
+ "learning_rate": 8.695652173913044e-07,
37
+ "loss": 1.0516364574432373,
38
+ "step": 4
39
+ },
40
+ {
41
+ "epoch": 0.014585232452142206,
42
+ "grad_norm": 0.9520533680915833,
43
+ "learning_rate": 1.1594202898550726e-06,
44
+ "loss": 0.8392249345779419,
45
+ "step": 5
46
+ },
47
+ {
48
+ "epoch": 0.017502278942570646,
49
+ "grad_norm": 1.2451188564300537,
50
+ "learning_rate": 1.4492753623188408e-06,
51
+ "loss": 1.0955077409744263,
52
+ "step": 6
53
+ },
54
+ {
55
+ "epoch": 0.02041932543299909,
56
+ "grad_norm": 1.1123991012573242,
57
+ "learning_rate": 1.7391304347826088e-06,
58
+ "loss": 0.9201866388320923,
59
+ "step": 7
60
+ },
61
+ {
62
+ "epoch": 0.02333637192342753,
63
+ "grad_norm": 0.9283139705657959,
64
+ "learning_rate": 2.028985507246377e-06,
65
+ "loss": 0.9770950078964233,
66
+ "step": 8
67
+ },
68
+ {
69
+ "epoch": 0.02625341841385597,
70
+ "grad_norm": 0.9589216113090515,
71
+ "learning_rate": 2.3188405797101453e-06,
72
+ "loss": 0.9442565441131592,
73
+ "step": 9
74
+ },
75
+ {
76
+ "epoch": 0.02917046490428441,
77
+ "grad_norm": 0.8866703510284424,
78
+ "learning_rate": 2.6086956521739132e-06,
79
+ "loss": 0.9354464411735535,
80
+ "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.03208751139471285,
84
+ "grad_norm": 0.7191241383552551,
85
+ "learning_rate": 2.8985507246376816e-06,
86
+ "loss": 0.7659736275672913,
87
+ "step": 11
88
+ },
89
+ {
90
+ "epoch": 0.03500455788514129,
91
+ "grad_norm": 0.9110142588615417,
92
+ "learning_rate": 3.188405797101449e-06,
93
+ "loss": 0.9319326877593994,
94
+ "step": 12
95
+ },
96
+ {
97
+ "epoch": 0.03792160437556973,
98
+ "grad_norm": 0.8754057288169861,
99
+ "learning_rate": 3.4782608695652175e-06,
100
+ "loss": 0.9819356203079224,
101
+ "step": 13
102
+ },
103
+ {
104
+ "epoch": 0.04083865086599818,
105
+ "grad_norm": 0.896181046962738,
106
+ "learning_rate": 3.768115942028986e-06,
107
+ "loss": 1.026316523551941,
108
+ "step": 14
109
+ },
110
+ {
111
+ "epoch": 0.04375569735642662,
112
+ "grad_norm": 0.6104832887649536,
113
+ "learning_rate": 4.057971014492754e-06,
114
+ "loss": 0.8427562713623047,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 0.04667274384685506,
119
+ "grad_norm": 0.6529208421707153,
120
+ "learning_rate": 4.347826086956522e-06,
121
+ "loss": 0.8496565222740173,
122
+ "step": 16
123
+ },
124
+ {
125
+ "epoch": 0.0495897903372835,
126
+ "grad_norm": 0.6319335699081421,
127
+ "learning_rate": 4.637681159420291e-06,
128
+ "loss": 0.9139047861099243,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 0.05250683682771194,
133
+ "grad_norm": 0.7458649277687073,
134
+ "learning_rate": 4.927536231884059e-06,
135
+ "loss": 0.8867442011833191,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 0.05542388331814038,
140
+ "grad_norm": 0.6179773211479187,
141
+ "learning_rate": 5.2173913043478265e-06,
142
+ "loss": 0.9579408168792725,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 0.05834092980856882,
147
+ "grad_norm": 0.794481635093689,
148
+ "learning_rate": 5.507246376811595e-06,
149
+ "loss": 0.8736554980278015,
150
+ "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.06125797629899726,
154
+ "grad_norm": 0.8356145620346069,
155
+ "learning_rate": 5.797101449275363e-06,
156
+ "loss": 0.9358762502670288,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.0641750227894257,
161
+ "grad_norm": 0.5891932845115662,
162
+ "learning_rate": 6.086956521739132e-06,
163
+ "loss": 0.8972038626670837,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.06709206927985414,
168
+ "grad_norm": 0.6931268572807312,
169
+ "learning_rate": 6.376811594202898e-06,
170
+ "loss": 0.9583507776260376,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.07000911577028258,
175
+ "grad_norm": 0.7298229336738586,
176
+ "learning_rate": 6.666666666666667e-06,
177
+ "loss": 0.8119489550590515,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.07292616226071102,
182
+ "grad_norm": 0.6419956684112549,
183
+ "learning_rate": 6.956521739130435e-06,
184
+ "loss": 0.9386100769042969,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.07584320875113947,
189
+ "grad_norm": 0.7508338689804077,
190
+ "learning_rate": 7.246376811594203e-06,
191
+ "loss": 0.9272583723068237,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.0787602552415679,
196
+ "grad_norm": 0.5848079919815063,
197
+ "learning_rate": 7.536231884057972e-06,
198
+ "loss": 0.8967856168746948,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.08167730173199636,
203
+ "grad_norm": 0.7384837865829468,
204
+ "learning_rate": 7.82608695652174e-06,
205
+ "loss": 0.8696568012237549,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.0845943482224248,
210
+ "grad_norm": 0.5069604516029358,
211
+ "learning_rate": 8.115942028985508e-06,
212
+ "loss": 0.9121193885803223,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.08751139471285324,
217
+ "grad_norm": 0.833165168762207,
218
+ "learning_rate": 8.405797101449275e-06,
219
+ "loss": 0.8180589079856873,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 0.09042844120328168,
224
+ "grad_norm": 0.6355920433998108,
225
+ "learning_rate": 8.695652173913044e-06,
226
+ "loss": 0.8640957474708557,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 0.09334548769371012,
231
+ "grad_norm": 1.0429315567016602,
232
+ "learning_rate": 8.985507246376812e-06,
233
+ "loss": 0.9517915844917297,
234
+ "step": 32
235
+ },
236
+ {
237
+ "epoch": 0.09626253418413856,
238
+ "grad_norm": 0.5875154733657837,
239
+ "learning_rate": 9.275362318840581e-06,
240
+ "loss": 0.9443603754043579,
241
+ "step": 33
242
+ },
243
+ {
244
+ "epoch": 0.099179580674567,
245
+ "grad_norm": 1.9913769960403442,
246
+ "learning_rate": 9.565217391304349e-06,
247
+ "loss": 0.9510866403579712,
248
+ "step": 34
249
+ },
250
+ {
251
+ "epoch": 0.10209662716499544,
252
+ "grad_norm": 0.5310097932815552,
253
+ "learning_rate": 9.855072463768118e-06,
254
+ "loss": 0.8653419613838196,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 0.10501367365542388,
259
+ "grad_norm": 0.624421238899231,
260
+ "learning_rate": 1.0144927536231885e-05,
261
+ "loss": 0.7941208481788635,
262
+ "step": 36
263
+ },
264
+ {
265
+ "epoch": 0.10793072014585232,
266
+ "grad_norm": 0.6314200758934021,
267
+ "learning_rate": 1.0434782608695653e-05,
268
+ "loss": 0.8931174278259277,
269
+ "step": 37
270
+ },
271
+ {
272
+ "epoch": 0.11084776663628076,
273
+ "grad_norm": 0.6272342205047607,
274
+ "learning_rate": 1.0724637681159422e-05,
275
+ "loss": 0.8978185057640076,
276
+ "step": 38
277
+ },
278
+ {
279
+ "epoch": 0.1137648131267092,
280
+ "grad_norm": 0.5711184740066528,
281
+ "learning_rate": 1.101449275362319e-05,
282
+ "loss": 0.808263897895813,
283
+ "step": 39
284
+ },
285
+ {
286
+ "epoch": 0.11668185961713765,
287
+ "grad_norm": 0.7581208944320679,
288
+ "learning_rate": 1.1304347826086957e-05,
289
+ "loss": 0.7456756830215454,
290
+ "step": 40
291
+ },
292
+ {
293
+ "epoch": 0.11959890610756609,
294
+ "grad_norm": 0.4989977180957794,
295
+ "learning_rate": 1.1594202898550726e-05,
296
+ "loss": 0.8273333311080933,
297
+ "step": 41
298
+ },
299
+ {
300
+ "epoch": 0.12251595259799453,
301
+ "grad_norm": 0.8602972626686096,
302
+ "learning_rate": 1.1884057971014494e-05,
303
+ "loss": 0.8514784574508667,
304
+ "step": 42
305
+ },
306
+ {
307
+ "epoch": 0.12543299908842298,
308
+ "grad_norm": 0.6918581128120422,
309
+ "learning_rate": 1.2173913043478263e-05,
310
+ "loss": 0.8182265162467957,
311
+ "step": 43
312
+ },
313
+ {
314
+ "epoch": 0.1283500455788514,
315
+ "grad_norm": 0.653099536895752,
316
+ "learning_rate": 1.2463768115942029e-05,
317
+ "loss": 0.8242791891098022,
318
+ "step": 44
319
+ },
320
+ {
321
+ "epoch": 0.13126709206927986,
322
+ "grad_norm": 0.7485584616661072,
323
+ "learning_rate": 1.2753623188405797e-05,
324
+ "loss": 0.8229591250419617,
325
+ "step": 45
326
+ },
327
+ {
328
+ "epoch": 0.1341841385597083,
329
+ "grad_norm": 0.6724833250045776,
330
+ "learning_rate": 1.3043478260869566e-05,
331
+ "loss": 0.8146833181381226,
332
+ "step": 46
333
+ },
334
+ {
335
+ "epoch": 0.13710118505013674,
336
+ "grad_norm": 0.857208251953125,
337
+ "learning_rate": 1.3333333333333333e-05,
338
+ "loss": 0.8154427409172058,
339
+ "step": 47
340
+ },
341
+ {
342
+ "epoch": 0.14001823154056517,
343
+ "grad_norm": 0.5559669137001038,
344
+ "learning_rate": 1.3623188405797103e-05,
345
+ "loss": 0.879005491733551,
346
+ "step": 48
347
+ },
348
+ {
349
+ "epoch": 0.14293527803099362,
350
+ "grad_norm": 0.5910897850990295,
351
+ "learning_rate": 1.391304347826087e-05,
352
+ "loss": 0.8148283362388611,
353
+ "step": 49
354
+ },
355
+ {
356
+ "epoch": 0.14585232452142205,
357
+ "grad_norm": 0.6478891372680664,
358
+ "learning_rate": 1.420289855072464e-05,
359
+ "loss": 0.8293006420135498,
360
+ "step": 50
361
+ },
362
+ {
363
+ "epoch": 0.14585232452142205,
364
+ "eval_loss": 0.7892261147499084,
365
+ "eval_runtime": 973.2157,
366
+ "eval_samples_per_second": 0.649,
367
+ "eval_steps_per_second": 0.649,
368
+ "step": 50
369
+ },
370
+ {
371
+ "epoch": 0.1487693710118505,
372
+ "grad_norm": 0.757882833480835,
373
+ "learning_rate": 1.4492753623188407e-05,
374
+ "loss": 0.8114852905273438,
375
+ "step": 51
376
+ },
377
+ {
378
+ "epoch": 0.15168641750227893,
379
+ "grad_norm": 0.8496116995811462,
380
+ "learning_rate": 1.4782608695652174e-05,
381
+ "loss": 0.7886185050010681,
382
+ "step": 52
383
+ },
384
+ {
385
+ "epoch": 0.15460346399270739,
386
+ "grad_norm": 0.6078857183456421,
387
+ "learning_rate": 1.5072463768115944e-05,
388
+ "loss": 0.7298170924186707,
389
+ "step": 53
390
+ },
391
+ {
392
+ "epoch": 0.1575205104831358,
393
+ "grad_norm": 0.5856835246086121,
394
+ "learning_rate": 1.536231884057971e-05,
395
+ "loss": 0.7407160997390747,
396
+ "step": 54
397
+ },
398
+ {
399
+ "epoch": 0.16043755697356427,
400
+ "grad_norm": 1.0533701181411743,
401
+ "learning_rate": 1.565217391304348e-05,
402
+ "loss": 0.7057831287384033,
403
+ "step": 55
404
+ },
405
+ {
406
+ "epoch": 0.16335460346399272,
407
+ "grad_norm": 0.8087610006332397,
408
+ "learning_rate": 1.5942028985507246e-05,
409
+ "loss": 0.7409019470214844,
410
+ "step": 56
411
+ },
412
+ {
413
+ "epoch": 0.16627164995442115,
414
+ "grad_norm": 0.629945695400238,
415
+ "learning_rate": 1.6231884057971015e-05,
416
+ "loss": 0.7768293023109436,
417
+ "step": 57
418
+ },
419
+ {
420
+ "epoch": 0.1691886964448496,
421
+ "grad_norm": 0.5187911987304688,
422
+ "learning_rate": 1.6521739130434785e-05,
423
+ "loss": 0.825718104839325,
424
+ "step": 58
425
+ },
426
+ {
427
+ "epoch": 0.17210574293527803,
428
+ "grad_norm": 0.5866358280181885,
429
+ "learning_rate": 1.681159420289855e-05,
430
+ "loss": 0.8575979471206665,
431
+ "step": 59
432
+ },
433
+ {
434
+ "epoch": 0.17502278942570648,
435
+ "grad_norm": 1.5098934173583984,
436
+ "learning_rate": 1.710144927536232e-05,
437
+ "loss": 0.8058848977088928,
438
+ "step": 60
439
+ },
440
+ {
441
+ "epoch": 0.1779398359161349,
442
+ "grad_norm": 0.6981958150863647,
443
+ "learning_rate": 1.739130434782609e-05,
444
+ "loss": 0.7640778422355652,
445
+ "step": 61
446
+ },
447
+ {
448
+ "epoch": 0.18085688240656336,
449
+ "grad_norm": 0.631349503993988,
450
+ "learning_rate": 1.7681159420289858e-05,
451
+ "loss": 0.7896331548690796,
452
+ "step": 62
453
+ },
454
+ {
455
+ "epoch": 0.1837739288969918,
456
+ "grad_norm": 0.6930747032165527,
457
+ "learning_rate": 1.7971014492753624e-05,
458
+ "loss": 0.6762524247169495,
459
+ "step": 63
460
+ },
461
+ {
462
+ "epoch": 0.18669097538742024,
463
+ "grad_norm": 0.599399209022522,
464
+ "learning_rate": 1.8260869565217393e-05,
465
+ "loss": 0.7285035848617554,
466
+ "step": 64
467
+ },
468
+ {
469
+ "epoch": 0.18960802187784867,
470
+ "grad_norm": 0.6194344758987427,
471
+ "learning_rate": 1.8550724637681162e-05,
472
+ "loss": 0.7682523131370544,
473
+ "step": 65
474
+ },
475
+ {
476
+ "epoch": 0.19252506836827712,
477
+ "grad_norm": 0.5691342949867249,
478
+ "learning_rate": 1.8840579710144928e-05,
479
+ "loss": 0.6791993379592896,
480
+ "step": 66
481
+ },
482
+ {
483
+ "epoch": 0.19544211485870555,
484
+ "grad_norm": 0.6257390379905701,
485
+ "learning_rate": 1.9130434782608697e-05,
486
+ "loss": 0.6744828224182129,
487
+ "step": 67
488
+ },
489
+ {
490
+ "epoch": 0.198359161349134,
491
+ "grad_norm": 0.5871018767356873,
492
+ "learning_rate": 1.9420289855072467e-05,
493
+ "loss": 0.7317330837249756,
494
+ "step": 68
495
+ },
496
+ {
497
+ "epoch": 0.20127620783956243,
498
+ "grad_norm": 1.0744612216949463,
499
+ "learning_rate": 1.9710144927536236e-05,
500
+ "loss": 0.6617178916931152,
501
+ "step": 69
502
+ },
503
+ {
504
+ "epoch": 0.2041932543299909,
505
+ "grad_norm": 0.675946831703186,
506
+ "learning_rate": 2e-05,
507
+ "loss": 0.7615712881088257,
508
+ "step": 70
509
+ },
510
+ {
511
+ "epoch": 0.2071103008204193,
512
+ "grad_norm": 0.7663411498069763,
513
+ "learning_rate": 1.9999870372100614e-05,
514
+ "loss": 0.7131291627883911,
515
+ "step": 71
516
+ },
517
+ {
518
+ "epoch": 0.21002734731084777,
519
+ "grad_norm": 0.6725395321846008,
520
+ "learning_rate": 1.9999481491763123e-05,
521
+ "loss": 0.7452989816665649,
522
+ "step": 72
523
+ },
524
+ {
525
+ "epoch": 0.21294439380127622,
526
+ "grad_norm": 0.6505664587020874,
527
+ "learning_rate": 1.9998833369069483e-05,
528
+ "loss": 0.7477136850357056,
529
+ "step": 73
530
+ },
531
+ {
532
+ "epoch": 0.21586144029170465,
533
+ "grad_norm": 0.7032860517501831,
534
+ "learning_rate": 1.9997926020822643e-05,
535
+ "loss": 0.6854275465011597,
536
+ "step": 74
537
+ },
538
+ {
539
+ "epoch": 0.2187784867821331,
540
+ "grad_norm": 0.645345151424408,
541
+ "learning_rate": 1.999675947054614e-05,
542
+ "loss": 0.7552425265312195,
543
+ "step": 75
544
+ },
545
+ {
546
+ "epoch": 0.22169553327256153,
547
+ "grad_norm": 0.6620492935180664,
548
+ "learning_rate": 1.9995333748483464e-05,
549
+ "loss": 0.7262853384017944,
550
+ "step": 76
551
+ },
552
+ {
553
+ "epoch": 0.22461257976298998,
554
+ "grad_norm": 0.6511455774307251,
555
+ "learning_rate": 1.9993648891597284e-05,
556
+ "loss": 0.7591732144355774,
557
+ "step": 77
558
+ },
559
+ {
560
+ "epoch": 0.2275296262534184,
561
+ "grad_norm": 0.6775254011154175,
562
+ "learning_rate": 1.9991704943568497e-05,
563
+ "loss": 0.7498704195022583,
564
+ "step": 78
565
+ },
566
+ {
567
+ "epoch": 0.23044667274384686,
568
+ "grad_norm": 0.8199896216392517,
569
+ "learning_rate": 1.9989501954795076e-05,
570
+ "loss": 0.7238684296607971,
571
+ "step": 79
572
+ },
573
+ {
574
+ "epoch": 0.2333637192342753,
575
+ "grad_norm": 0.8197569847106934,
576
+ "learning_rate": 1.998703998239079e-05,
577
+ "loss": 0.7028778195381165,
578
+ "step": 80
579
+ },
580
+ {
581
+ "epoch": 0.23628076572470375,
582
+ "grad_norm": 0.6602625250816345,
583
+ "learning_rate": 1.9984319090183692e-05,
584
+ "loss": 0.8842703104019165,
585
+ "step": 81
586
+ },
587
+ {
588
+ "epoch": 0.23919781221513217,
589
+ "grad_norm": 0.9587129354476929,
590
+ "learning_rate": 1.99813393487145e-05,
591
+ "loss": 0.732614278793335,
592
+ "step": 82
593
+ },
594
+ {
595
+ "epoch": 0.24211485870556063,
596
+ "grad_norm": 0.6822189092636108,
597
+ "learning_rate": 1.997810083523473e-05,
598
+ "loss": 0.7544928193092346,
599
+ "step": 83
600
+ },
601
+ {
602
+ "epoch": 0.24503190519598905,
603
+ "grad_norm": 0.8980082869529724,
604
+ "learning_rate": 1.9974603633704726e-05,
605
+ "loss": 0.6704054474830627,
606
+ "step": 84
607
+ },
608
+ {
609
+ "epoch": 0.2479489516864175,
610
+ "grad_norm": 0.7413425445556641,
611
+ "learning_rate": 1.9970847834791472e-05,
612
+ "loss": 0.693661093711853,
613
+ "step": 85
614
+ },
615
+ {
616
+ "epoch": 0.25086599817684596,
617
+ "grad_norm": 0.8314999341964722,
618
+ "learning_rate": 1.9966833535866223e-05,
619
+ "loss": 0.667654275894165,
620
+ "step": 86
621
+ },
622
+ {
623
+ "epoch": 0.25378304466727436,
624
+ "grad_norm": 0.7972444891929626,
625
+ "learning_rate": 1.9962560841002013e-05,
626
+ "loss": 0.8403134942054749,
627
+ "step": 87
628
+ },
629
+ {
630
+ "epoch": 0.2567000911577028,
631
+ "grad_norm": 0.8519951701164246,
632
+ "learning_rate": 1.995802986097093e-05,
633
+ "loss": 0.6897370219230652,
634
+ "step": 88
635
+ },
636
+ {
637
+ "epoch": 0.25961713764813127,
638
+ "grad_norm": 0.8268933892250061,
639
+ "learning_rate": 1.995324071324126e-05,
640
+ "loss": 0.6690632700920105,
641
+ "step": 89
642
+ },
643
+ {
644
+ "epoch": 0.2625341841385597,
645
+ "grad_norm": 0.7133983969688416,
646
+ "learning_rate": 1.9948193521974436e-05,
647
+ "loss": 0.6314147114753723,
648
+ "step": 90
649
+ },
650
+ {
651
+ "epoch": 0.2654512306289881,
652
+ "grad_norm": 0.889302134513855,
653
+ "learning_rate": 1.9942888418021814e-05,
654
+ "loss": 0.7389825582504272,
655
+ "step": 91
656
+ },
657
+ {
658
+ "epoch": 0.2683682771194166,
659
+ "grad_norm": 0.7022432088851929,
660
+ "learning_rate": 1.99373255389213e-05,
661
+ "loss": 0.6916261911392212,
662
+ "step": 92
663
+ },
664
+ {
665
+ "epoch": 0.27128532360984503,
666
+ "grad_norm": 0.696432888507843,
667
+ "learning_rate": 1.9931505028893748e-05,
668
+ "loss": 0.6908476948738098,
669
+ "step": 93
670
+ },
671
+ {
672
+ "epoch": 0.2742023701002735,
673
+ "grad_norm": 0.7667419910430908,
674
+ "learning_rate": 1.9925427038839267e-05,
675
+ "loss": 0.6500837206840515,
676
+ "step": 94
677
+ },
678
+ {
679
+ "epoch": 0.27711941659070194,
680
+ "grad_norm": 0.6974894404411316,
681
+ "learning_rate": 1.9919091726333265e-05,
682
+ "loss": 0.7059191465377808,
683
+ "step": 95
684
+ },
685
+ {
686
+ "epoch": 0.28003646308113034,
687
+ "grad_norm": 0.7047077417373657,
688
+ "learning_rate": 1.9912499255622397e-05,
689
+ "loss": 0.6287837624549866,
690
+ "step": 96
691
+ },
692
+ {
693
+ "epoch": 0.2829535095715588,
694
+ "grad_norm": 0.7729557156562805,
695
+ "learning_rate": 1.990564979762029e-05,
696
+ "loss": 0.6738612055778503,
697
+ "step": 97
698
+ },
699
+ {
700
+ "epoch": 0.28587055606198725,
701
+ "grad_norm": 0.7020529508590698,
702
+ "learning_rate": 1.989854352990311e-05,
703
+ "loss": 0.662042498588562,
704
+ "step": 98
705
+ },
706
+ {
707
+ "epoch": 0.2887876025524157,
708
+ "grad_norm": 0.7369800209999084,
709
+ "learning_rate": 1.9891180636704975e-05,
710
+ "loss": 0.6246830821037292,
711
+ "step": 99
712
+ },
713
+ {
714
+ "epoch": 0.2917046490428441,
715
+ "grad_norm": 0.7412623167037964,
716
+ "learning_rate": 1.9883561308913154e-05,
717
+ "loss": 0.6623879075050354,
718
+ "step": 100
719
+ },
720
+ {
721
+ "epoch": 0.2917046490428441,
722
+ "eval_loss": 0.6552971005439758,
723
+ "eval_runtime": 966.7072,
724
+ "eval_samples_per_second": 0.654,
725
+ "eval_steps_per_second": 0.654,
726
+ "step": 100
727
+ },
728
+ {
729
+ "epoch": 0.29462169553327255,
730
+ "grad_norm": 0.8428792953491211,
731
+ "learning_rate": 1.987568574406314e-05,
732
+ "loss": 0.6312171816825867,
733
+ "step": 101
734
+ },
735
+ {
736
+ "epoch": 0.297538742023701,
737
+ "grad_norm": 0.6948133707046509,
738
+ "learning_rate": 1.9867554146333517e-05,
739
+ "loss": 0.6266146898269653,
740
+ "step": 102
741
+ },
742
+ {
743
+ "epoch": 0.30045578851412946,
744
+ "grad_norm": 1.3897597789764404,
745
+ "learning_rate": 1.985916672654068e-05,
746
+ "loss": 0.6669265031814575,
747
+ "step": 103
748
+ },
749
+ {
750
+ "epoch": 0.30337283500455786,
751
+ "grad_norm": 0.8838400840759277,
752
+ "learning_rate": 1.985052370213334e-05,
753
+ "loss": 0.6601086854934692,
754
+ "step": 104
755
+ },
756
+ {
757
+ "epoch": 0.3062898814949863,
758
+ "grad_norm": 0.8471395373344421,
759
+ "learning_rate": 1.9841625297186925e-05,
760
+ "loss": 0.5984431505203247,
761
+ "step": 105
762
+ },
763
+ {
764
+ "epoch": 0.30920692798541477,
765
+ "grad_norm": 0.8940042853355408,
766
+ "learning_rate": 1.983247174239774e-05,
767
+ "loss": 0.7223822474479675,
768
+ "step": 106
769
+ },
770
+ {
771
+ "epoch": 0.3121239744758432,
772
+ "grad_norm": 0.7833696603775024,
773
+ "learning_rate": 1.9823063275076998e-05,
774
+ "loss": 0.6868705749511719,
775
+ "step": 107
776
+ },
777
+ {
778
+ "epoch": 0.3150410209662716,
779
+ "grad_norm": 0.8794649243354797,
780
+ "learning_rate": 1.9813400139144673e-05,
781
+ "loss": 0.6246675848960876,
782
+ "step": 108
783
+ },
784
+ {
785
+ "epoch": 0.3179580674567001,
786
+ "grad_norm": 0.8126057982444763,
787
+ "learning_rate": 1.9803482585123165e-05,
788
+ "loss": 0.5908697247505188,
789
+ "step": 109
790
+ },
791
+ {
792
+ "epoch": 0.32087511394712853,
793
+ "grad_norm": 0.7947676777839661,
794
+ "learning_rate": 1.979331087013082e-05,
795
+ "loss": 0.5751246809959412,
796
+ "step": 110
797
+ },
798
+ {
799
+ "epoch": 0.323792160437557,
800
+ "grad_norm": 0.713545560836792,
801
+ "learning_rate": 1.978288525787524e-05,
802
+ "loss": 0.6081106066703796,
803
+ "step": 111
804
+ },
805
+ {
806
+ "epoch": 0.32670920692798544,
807
+ "grad_norm": 1.011828064918518,
808
+ "learning_rate": 1.977220601864647e-05,
809
+ "loss": 0.7039169669151306,
810
+ "step": 112
811
+ },
812
+ {
813
+ "epoch": 0.32962625341841384,
814
+ "grad_norm": 0.730570912361145,
815
+ "learning_rate": 1.9761273429309982e-05,
816
+ "loss": 0.6140255928039551,
817
+ "step": 113
818
+ },
819
+ {
820
+ "epoch": 0.3325432999088423,
821
+ "grad_norm": 1.059688687324524,
822
+ "learning_rate": 1.9750087773299492e-05,
823
+ "loss": 0.648114025592804,
824
+ "step": 114
825
+ },
826
+ {
827
+ "epoch": 0.33546034639927075,
828
+ "grad_norm": 0.9336895942687988,
829
+ "learning_rate": 1.973864934060962e-05,
830
+ "loss": 0.622555673122406,
831
+ "step": 115
832
+ },
833
+ {
834
+ "epoch": 0.3383773928896992,
835
+ "grad_norm": 0.7195945978164673,
836
+ "learning_rate": 1.9726958427788367e-05,
837
+ "loss": 0.70485520362854,
838
+ "step": 116
839
+ },
840
+ {
841
+ "epoch": 0.3412944393801276,
842
+ "grad_norm": 0.8101872801780701,
843
+ "learning_rate": 1.971501533792942e-05,
844
+ "loss": 0.6958848834037781,
845
+ "step": 117
846
+ },
847
+ {
848
+ "epoch": 0.34421148587055606,
849
+ "grad_norm": 1.6075212955474854,
850
+ "learning_rate": 1.970282038066432e-05,
851
+ "loss": 0.6021550893783569,
852
+ "step": 118
853
+ },
854
+ {
855
+ "epoch": 0.3471285323609845,
856
+ "grad_norm": 0.7881433963775635,
857
+ "learning_rate": 1.9690373872154396e-05,
858
+ "loss": 0.6449777483940125,
859
+ "step": 119
860
+ },
861
+ {
862
+ "epoch": 0.35004557885141296,
863
+ "grad_norm": 1.014639973640442,
864
+ "learning_rate": 1.9677676135082606e-05,
865
+ "loss": 0.5939379930496216,
866
+ "step": 120
867
+ },
868
+ {
869
+ "epoch": 0.35296262534184136,
870
+ "grad_norm": 0.8198449611663818,
871
+ "learning_rate": 1.9664727498645144e-05,
872
+ "loss": 0.6210286617279053,
873
+ "step": 121
874
+ },
875
+ {
876
+ "epoch": 0.3558796718322698,
877
+ "grad_norm": 1.0194576978683472,
878
+ "learning_rate": 1.9651528298542918e-05,
879
+ "loss": 0.624247670173645,
880
+ "step": 122
881
+ },
882
+ {
883
+ "epoch": 0.35879671832269827,
884
+ "grad_norm": 0.7963470220565796,
885
+ "learning_rate": 1.9638078876972842e-05,
886
+ "loss": 0.6479315757751465,
887
+ "step": 123
888
+ },
889
+ {
890
+ "epoch": 0.3617137648131267,
891
+ "grad_norm": 0.9007541537284851,
892
+ "learning_rate": 1.9624379582618976e-05,
893
+ "loss": 0.6131505370140076,
894
+ "step": 124
895
+ },
896
+ {
897
+ "epoch": 0.3646308113035551,
898
+ "grad_norm": 0.8712120056152344,
899
+ "learning_rate": 1.9610430770643464e-05,
900
+ "loss": 0.6249448657035828,
901
+ "step": 125
902
+ },
903
+ {
904
+ "epoch": 0.3675478577939836,
905
+ "grad_norm": 1.1482540369033813,
906
+ "learning_rate": 1.9596232802677347e-05,
907
+ "loss": 0.5844688415527344,
908
+ "step": 126
909
+ },
910
+ {
911
+ "epoch": 0.37046490428441203,
912
+ "grad_norm": 0.8662379384040833,
913
+ "learning_rate": 1.9581786046811175e-05,
914
+ "loss": 0.6573485732078552,
915
+ "step": 127
916
+ },
917
+ {
918
+ "epoch": 0.3733819507748405,
919
+ "grad_norm": 0.8191388845443726,
920
+ "learning_rate": 1.9567090877585477e-05,
921
+ "loss": 0.5896862745285034,
922
+ "step": 128
923
+ },
924
+ {
925
+ "epoch": 0.37629899726526894,
926
+ "grad_norm": 1.0187078714370728,
927
+ "learning_rate": 1.955214767598103e-05,
928
+ "loss": 0.613490879535675,
929
+ "step": 129
930
+ },
931
+ {
932
+ "epoch": 0.37921604375569734,
933
+ "grad_norm": 0.8444119691848755,
934
+ "learning_rate": 1.953695682940901e-05,
935
+ "loss": 0.727687656879425,
936
+ "step": 130
937
+ },
938
+ {
939
+ "epoch": 0.3821330902461258,
940
+ "grad_norm": 0.74753737449646,
941
+ "learning_rate": 1.9521518731700913e-05,
942
+ "loss": 0.6102436780929565,
943
+ "step": 131
944
+ },
945
+ {
946
+ "epoch": 0.38505013673655425,
947
+ "grad_norm": 1.0166202783584595,
948
+ "learning_rate": 1.9505833783098378e-05,
949
+ "loss": 0.6244844198226929,
950
+ "step": 132
951
+ },
952
+ {
953
+ "epoch": 0.3879671832269827,
954
+ "grad_norm": 0.8175772428512573,
955
+ "learning_rate": 1.9489902390242793e-05,
956
+ "loss": 0.5939282178878784,
957
+ "step": 133
958
+ },
959
+ {
960
+ "epoch": 0.3908842297174111,
961
+ "grad_norm": 1.0177713632583618,
962
+ "learning_rate": 1.947372496616476e-05,
963
+ "loss": 0.6418229937553406,
964
+ "step": 134
965
+ },
966
+ {
967
+ "epoch": 0.39380127620783956,
968
+ "grad_norm": 0.8652453422546387,
969
+ "learning_rate": 1.9457301930273376e-05,
970
+ "loss": 0.5870395302772522,
971
+ "step": 135
972
+ },
973
+ {
974
+ "epoch": 0.396718322698268,
975
+ "grad_norm": 0.8378894925117493,
976
+ "learning_rate": 1.9440633708345365e-05,
977
+ "loss": 0.6480278372764587,
978
+ "step": 136
979
+ },
980
+ {
981
+ "epoch": 0.39963536918869647,
982
+ "grad_norm": 0.8303541541099548,
983
+ "learning_rate": 1.9423720732514052e-05,
984
+ "loss": 0.6191359758377075,
985
+ "step": 137
986
+ },
987
+ {
988
+ "epoch": 0.40255241567912486,
989
+ "grad_norm": 0.8576734662055969,
990
+ "learning_rate": 1.9406563441258145e-05,
991
+ "loss": 0.5696198344230652,
992
+ "step": 138
993
+ },
994
+ {
995
+ "epoch": 0.4054694621695533,
996
+ "grad_norm": 0.9558727145195007,
997
+ "learning_rate": 1.9389162279390362e-05,
998
+ "loss": 0.6177623271942139,
999
+ "step": 139
1000
+ },
1001
+ {
1002
+ "epoch": 0.4083865086599818,
1003
+ "grad_norm": 0.7046042084693909,
1004
+ "learning_rate": 1.9371517698045922e-05,
1005
+ "loss": 0.5836521983146667,
1006
+ "step": 140
1007
+ },
1008
+ {
1009
+ "epoch": 0.4113035551504102,
1010
+ "grad_norm": 1.0522717237472534,
1011
+ "learning_rate": 1.935363015467082e-05,
1012
+ "loss": 0.5728275775909424,
1013
+ "step": 141
1014
+ },
1015
+ {
1016
+ "epoch": 0.4142206016408386,
1017
+ "grad_norm": 0.9554787874221802,
1018
+ "learning_rate": 1.933550011301e-05,
1019
+ "loss": 0.632586658000946,
1020
+ "step": 142
1021
+ },
1022
+ {
1023
+ "epoch": 0.4171376481312671,
1024
+ "grad_norm": 0.8874214291572571,
1025
+ "learning_rate": 1.9317128043095293e-05,
1026
+ "loss": 0.5850118398666382,
1027
+ "step": 143
1028
+ },
1029
+ {
1030
+ "epoch": 0.42005469462169553,
1031
+ "grad_norm": 1.0708963871002197,
1032
+ "learning_rate": 1.9298514421233276e-05,
1033
+ "loss": 0.6260685324668884,
1034
+ "step": 144
1035
+ },
1036
+ {
1037
+ "epoch": 0.422971741112124,
1038
+ "grad_norm": 0.8135736584663391,
1039
+ "learning_rate": 1.9279659729992888e-05,
1040
+ "loss": 0.6031094193458557,
1041
+ "step": 145
1042
+ },
1043
+ {
1044
+ "epoch": 0.42588878760255244,
1045
+ "grad_norm": 0.7971774339675903,
1046
+ "learning_rate": 1.9260564458192926e-05,
1047
+ "loss": 0.6101322770118713,
1048
+ "step": 146
1049
+ },
1050
+ {
1051
+ "epoch": 0.42880583409298084,
1052
+ "grad_norm": 0.9374974966049194,
1053
+ "learning_rate": 1.9241229100889397e-05,
1054
+ "loss": 0.5836313366889954,
1055
+ "step": 147
1056
+ },
1057
+ {
1058
+ "epoch": 0.4317228805834093,
1059
+ "grad_norm": 0.8043425679206848,
1060
+ "learning_rate": 1.9221654159362636e-05,
1061
+ "loss": 0.6181215047836304,
1062
+ "step": 148
1063
+ },
1064
+ {
1065
+ "epoch": 0.43463992707383775,
1066
+ "grad_norm": 0.8923380374908447,
1067
+ "learning_rate": 1.920184014110436e-05,
1068
+ "loss": 0.6149677634239197,
1069
+ "step": 149
1070
+ },
1071
+ {
1072
+ "epoch": 0.4375569735642662,
1073
+ "grad_norm": 0.8908132314682007,
1074
+ "learning_rate": 1.918178755980449e-05,
1075
+ "loss": 0.5899742841720581,
1076
+ "step": 150
1077
+ },
1078
+ {
1079
+ "epoch": 0.4375569735642662,
1080
+ "eval_loss": 0.5903874635696411,
1081
+ "eval_runtime": 1186.9542,
1082
+ "eval_samples_per_second": 0.532,
1083
+ "eval_steps_per_second": 0.532,
1084
+ "step": 150
1085
+ },
1086
+ {
1087
+ "epoch": 0.4404740200546946,
1088
+ "grad_norm": 1.060531497001648,
1089
+ "learning_rate": 1.9161496935337808e-05,
1090
+ "loss": 0.5852696895599365,
1091
+ "step": 151
1092
+ },
1093
+ {
1094
+ "epoch": 0.44339106654512306,
1095
+ "grad_norm": 0.9723032712936401,
1096
+ "learning_rate": 1.914096879375053e-05,
1097
+ "loss": 0.5822056531906128,
1098
+ "step": 152
1099
+ },
1100
+ {
1101
+ "epoch": 0.4463081130355515,
1102
+ "grad_norm": 0.9519931674003601,
1103
+ "learning_rate": 1.912020366724663e-05,
1104
+ "loss": 0.6183493137359619,
1105
+ "step": 153
1106
+ },
1107
+ {
1108
+ "epoch": 0.44922515952597997,
1109
+ "grad_norm": 0.8282918334007263,
1110
+ "learning_rate": 1.9099202094174055e-05,
1111
+ "loss": 0.6229860782623291,
1112
+ "step": 154
1113
+ },
1114
+ {
1115
+ "epoch": 0.45214220601640837,
1116
+ "grad_norm": 0.9251292943954468,
1117
+ "learning_rate": 1.907796461901076e-05,
1118
+ "loss": 0.6552959680557251,
1119
+ "step": 155
1120
+ },
1121
+ {
1122
+ "epoch": 0.4550592525068368,
1123
+ "grad_norm": 1.0349540710449219,
1124
+ "learning_rate": 1.9056491792350606e-05,
1125
+ "loss": 0.6170098781585693,
1126
+ "step": 156
1127
+ },
1128
+ {
1129
+ "epoch": 0.4579762989972653,
1130
+ "grad_norm": 0.8720711469650269,
1131
+ "learning_rate": 1.9034784170889076e-05,
1132
+ "loss": 0.5870137810707092,
1133
+ "step": 157
1134
+ },
1135
+ {
1136
+ "epoch": 0.46089334548769373,
1137
+ "grad_norm": 1.0785977840423584,
1138
+ "learning_rate": 1.9012842317408843e-05,
1139
+ "loss": 0.5515124201774597,
1140
+ "step": 158
1141
+ },
1142
+ {
1143
+ "epoch": 0.4638103919781221,
1144
+ "grad_norm": 1.0634154081344604,
1145
+ "learning_rate": 1.8990666800765187e-05,
1146
+ "loss": 0.6073828339576721,
1147
+ "step": 159
1148
+ },
1149
+ {
1150
+ "epoch": 0.4667274384685506,
1151
+ "grad_norm": 0.8770879507064819,
1152
+ "learning_rate": 1.896825819587123e-05,
1153
+ "loss": 0.5960907936096191,
1154
+ "step": 160
1155
+ },
1156
+ {
1157
+ "epoch": 0.46964448495897904,
1158
+ "grad_norm": 1.1225898265838623,
1159
+ "learning_rate": 1.894561708368305e-05,
1160
+ "loss": 0.545990526676178,
1161
+ "step": 161
1162
+ },
1163
+ {
1164
+ "epoch": 0.4725615314494075,
1165
+ "grad_norm": 0.9373893141746521,
1166
+ "learning_rate": 1.8922744051184613e-05,
1167
+ "loss": 0.5566108822822571,
1168
+ "step": 162
1169
+ },
1170
+ {
1171
+ "epoch": 0.4754785779398359,
1172
+ "grad_norm": 1.5016087293624878,
1173
+ "learning_rate": 1.8899639691372545e-05,
1174
+ "loss": 0.558845043182373,
1175
+ "step": 163
1176
+ },
1177
+ {
1178
+ "epoch": 0.47839562443026434,
1179
+ "grad_norm": 0.903020977973938,
1180
+ "learning_rate": 1.8876304603240773e-05,
1181
+ "loss": 0.6824233531951904,
1182
+ "step": 164
1183
+ },
1184
+ {
1185
+ "epoch": 0.4813126709206928,
1186
+ "grad_norm": 0.8239623308181763,
1187
+ "learning_rate": 1.8852739391764993e-05,
1188
+ "loss": 0.5630610585212708,
1189
+ "step": 165
1190
+ },
1191
+ {
1192
+ "epoch": 0.48422971741112125,
1193
+ "grad_norm": 0.926069438457489,
1194
+ "learning_rate": 1.882894466788697e-05,
1195
+ "loss": 0.6211802363395691,
1196
+ "step": 166
1197
+ },
1198
+ {
1199
+ "epoch": 0.4871467639015497,
1200
+ "grad_norm": 1.0098828077316284,
1201
+ "learning_rate": 1.8804921048498722e-05,
1202
+ "loss": 0.5513257384300232,
1203
+ "step": 167
1204
+ },
1205
+ {
1206
+ "epoch": 0.4900638103919781,
1207
+ "grad_norm": 0.9228141903877258,
1208
+ "learning_rate": 1.8780669156426517e-05,
1209
+ "loss": 0.6197121739387512,
1210
+ "step": 168
1211
+ },
1212
+ {
1213
+ "epoch": 0.49298085688240656,
1214
+ "grad_norm": 1.0551754236221313,
1215
+ "learning_rate": 1.8756189620414712e-05,
1216
+ "loss": 0.5221806764602661,
1217
+ "step": 169
1218
+ },
1219
+ {
1220
+ "epoch": 0.495897903372835,
1221
+ "grad_norm": 0.9017496109008789,
1222
+ "learning_rate": 1.873148307510948e-05,
1223
+ "loss": 0.5766995549201965,
1224
+ "step": 170
1225
+ },
1226
+ {
1227
+ "epoch": 0.49881494986326347,
1228
+ "grad_norm": 0.9704970717430115,
1229
+ "learning_rate": 1.870655016104233e-05,
1230
+ "loss": 0.6514763832092285,
1231
+ "step": 171
1232
+ },
1233
+ {
1234
+ "epoch": 0.5017319963536919,
1235
+ "grad_norm": 0.9972712397575378,
1236
+ "learning_rate": 1.8681391524613518e-05,
1237
+ "loss": 0.5273895263671875,
1238
+ "step": 172
1239
+ },
1240
+ {
1241
+ "epoch": 0.5046490428441204,
1242
+ "grad_norm": 0.9473339319229126,
1243
+ "learning_rate": 1.8656007818075288e-05,
1244
+ "loss": 0.5548599362373352,
1245
+ "step": 173
1246
+ },
1247
+ {
1248
+ "epoch": 0.5075660893345487,
1249
+ "grad_norm": 1.2493574619293213,
1250
+ "learning_rate": 1.8630399699514944e-05,
1251
+ "loss": 0.5593586564064026,
1252
+ "step": 174
1253
+ },
1254
+ {
1255
+ "epoch": 0.5104831358249772,
1256
+ "grad_norm": 1.2766696214675903,
1257
+ "learning_rate": 1.860456783283781e-05,
1258
+ "loss": 0.6054630279541016,
1259
+ "step": 175
1260
+ },
1261
+ {
1262
+ "epoch": 0.5134001823154056,
1263
+ "grad_norm": 0.9555240869522095,
1264
+ "learning_rate": 1.857851288775002e-05,
1265
+ "loss": 0.508592963218689,
1266
+ "step": 176
1267
+ },
1268
+ {
1269
+ "epoch": 0.5163172288058341,
1270
+ "grad_norm": 1.260219931602478,
1271
+ "learning_rate": 1.8552235539741118e-05,
1272
+ "loss": 0.5532065629959106,
1273
+ "step": 177
1274
+ },
1275
+ {
1276
+ "epoch": 0.5192342752962625,
1277
+ "grad_norm": 1.1859954595565796,
1278
+ "learning_rate": 1.8525736470066595e-05,
1279
+ "loss": 0.5683344006538391,
1280
+ "step": 178
1281
+ },
1282
+ {
1283
+ "epoch": 0.522151321786691,
1284
+ "grad_norm": 1.3044344186782837,
1285
+ "learning_rate": 1.8499016365730203e-05,
1286
+ "loss": 0.5281959772109985,
1287
+ "step": 179
1288
+ },
1289
+ {
1290
+ "epoch": 0.5250683682771194,
1291
+ "grad_norm": 1.3049921989440918,
1292
+ "learning_rate": 1.8472075919466137e-05,
1293
+ "loss": 0.49621230363845825,
1294
+ "step": 180
1295
+ },
1296
+ {
1297
+ "epoch": 0.5279854147675479,
1298
+ "grad_norm": 1.0488537549972534,
1299
+ "learning_rate": 1.844491582972109e-05,
1300
+ "loss": 0.6194032430648804,
1301
+ "step": 181
1302
+ },
1303
+ {
1304
+ "epoch": 0.5309024612579762,
1305
+ "grad_norm": 1.5553455352783203,
1306
+ "learning_rate": 1.8417536800636138e-05,
1307
+ "loss": 0.5645846724510193,
1308
+ "step": 182
1309
+ },
1310
+ {
1311
+ "epoch": 0.5338195077484047,
1312
+ "grad_norm": 1.2673912048339844,
1313
+ "learning_rate": 1.8389939542028484e-05,
1314
+ "loss": 0.6267315745353699,
1315
+ "step": 183
1316
+ },
1317
+ {
1318
+ "epoch": 0.5367365542388332,
1319
+ "grad_norm": 1.0273847579956055,
1320
+ "learning_rate": 1.8362124769373064e-05,
1321
+ "loss": 0.5256403684616089,
1322
+ "step": 184
1323
+ },
1324
+ {
1325
+ "epoch": 0.5396536007292616,
1326
+ "grad_norm": 1.006093978881836,
1327
+ "learning_rate": 1.8334093203783986e-05,
1328
+ "loss": 0.5916382074356079,
1329
+ "step": 185
1330
+ },
1331
+ {
1332
+ "epoch": 0.5425706472196901,
1333
+ "grad_norm": 1.2740857601165771,
1334
+ "learning_rate": 1.8305845571995843e-05,
1335
+ "loss": 0.581648588180542,
1336
+ "step": 186
1337
+ },
1338
+ {
1339
+ "epoch": 0.5454876937101185,
1340
+ "grad_norm": 1.494248390197754,
1341
+ "learning_rate": 1.8277382606344872e-05,
1342
+ "loss": 0.4824523627758026,
1343
+ "step": 187
1344
+ },
1345
+ {
1346
+ "epoch": 0.548404740200547,
1347
+ "grad_norm": 1.1862496137619019,
1348
+ "learning_rate": 1.824870504474996e-05,
1349
+ "loss": 0.5531858205795288,
1350
+ "step": 188
1351
+ },
1352
+ {
1353
+ "epoch": 0.5513217866909754,
1354
+ "grad_norm": 3.503049373626709,
1355
+ "learning_rate": 1.8219813630693523e-05,
1356
+ "loss": 0.6308296918869019,
1357
+ "step": 189
1358
+ },
1359
+ {
1360
+ "epoch": 0.5542388331814039,
1361
+ "grad_norm": 1.7544710636138916,
1362
+ "learning_rate": 1.819070911320222e-05,
1363
+ "loss": 0.6146273016929626,
1364
+ "step": 190
1365
+ },
1366
+ {
1367
+ "epoch": 0.5571558796718322,
1368
+ "grad_norm": 1.3367774486541748,
1369
+ "learning_rate": 1.8161392246827546e-05,
1370
+ "loss": 0.5848966240882874,
1371
+ "step": 191
1372
+ },
1373
+ {
1374
+ "epoch": 0.5600729261622607,
1375
+ "grad_norm": 1.696418046951294,
1376
+ "learning_rate": 1.8131863791626263e-05,
1377
+ "loss": 0.6621730327606201,
1378
+ "step": 192
1379
+ },
1380
+ {
1381
+ "epoch": 0.5629899726526891,
1382
+ "grad_norm": 1.360052227973938,
1383
+ "learning_rate": 1.8102124513140694e-05,
1384
+ "loss": 0.5972204208374023,
1385
+ "step": 193
1386
+ },
1387
+ {
1388
+ "epoch": 0.5659070191431176,
1389
+ "grad_norm": 1.5376263856887817,
1390
+ "learning_rate": 1.807217518237888e-05,
1391
+ "loss": 0.4938785433769226,
1392
+ "step": 194
1393
+ },
1394
+ {
1395
+ "epoch": 0.568824065633546,
1396
+ "grad_norm": 1.2249681949615479,
1397
+ "learning_rate": 1.8042016575794585e-05,
1398
+ "loss": 0.5366095304489136,
1399
+ "step": 195
1400
+ },
1401
+ {
1402
+ "epoch": 0.5717411121239745,
1403
+ "grad_norm": 1.7868080139160156,
1404
+ "learning_rate": 1.8011649475267178e-05,
1405
+ "loss": 0.5116773843765259,
1406
+ "step": 196
1407
+ },
1408
+ {
1409
+ "epoch": 0.574658158614403,
1410
+ "grad_norm": 2.369993209838867,
1411
+ "learning_rate": 1.7981074668081345e-05,
1412
+ "loss": 0.49072742462158203,
1413
+ "step": 197
1414
+ },
1415
+ {
1416
+ "epoch": 0.5775752051048314,
1417
+ "grad_norm": 1.0168434381484985,
1418
+ "learning_rate": 1.7950292946906695e-05,
1419
+ "loss": 0.5691611170768738,
1420
+ "step": 198
1421
+ },
1422
+ {
1423
+ "epoch": 0.5804922515952597,
1424
+ "grad_norm": 1.2990851402282715,
1425
+ "learning_rate": 1.7919305109777195e-05,
1426
+ "loss": 0.5515039563179016,
1427
+ "step": 199
1428
+ },
1429
+ {
1430
+ "epoch": 0.5834092980856882,
1431
+ "grad_norm": 1.4859853982925415,
1432
+ "learning_rate": 1.7888111960070493e-05,
1433
+ "loss": 0.5017011165618896,
1434
+ "step": 200
1435
+ },
1436
+ {
1437
+ "epoch": 0.5834092980856882,
1438
+ "eval_loss": 0.5414339303970337,
1439
+ "eval_runtime": 1180.7894,
1440
+ "eval_samples_per_second": 0.535,
1441
+ "eval_steps_per_second": 0.535,
1442
+ "step": 200
1443
+ },
1444
+ {
1445
+ "epoch": 0.5863263445761167,
1446
+ "grad_norm": 1.0065829753875732,
1447
+ "learning_rate": 1.7856714306487088e-05,
1448
+ "loss": 0.5677731037139893,
1449
+ "step": 201
1450
+ },
1451
+ {
1452
+ "epoch": 0.5892433910665451,
1453
+ "grad_norm": 1.1727538108825684,
1454
+ "learning_rate": 1.7825112963029352e-05,
1455
+ "loss": 0.4525509476661682,
1456
+ "step": 202
1457
+ },
1458
+ {
1459
+ "epoch": 0.5921604375569736,
1460
+ "grad_norm": 1.3376752138137817,
1461
+ "learning_rate": 1.7793308748980437e-05,
1462
+ "loss": 0.5208959579467773,
1463
+ "step": 203
1464
+ },
1465
+ {
1466
+ "epoch": 0.595077484047402,
1467
+ "grad_norm": 0.9196159839630127,
1468
+ "learning_rate": 1.776130248888304e-05,
1469
+ "loss": 0.6033903360366821,
1470
+ "step": 204
1471
+ },
1472
+ {
1473
+ "epoch": 0.5979945305378305,
1474
+ "grad_norm": 1.0750919580459595,
1475
+ "learning_rate": 1.772909501251801e-05,
1476
+ "loss": 0.5449609160423279,
1477
+ "step": 205
1478
+ },
1479
+ {
1480
+ "epoch": 0.6009115770282589,
1481
+ "grad_norm": 1.2459467649459839,
1482
+ "learning_rate": 1.769668715488285e-05,
1483
+ "loss": 0.5685338377952576,
1484
+ "step": 206
1485
+ },
1486
+ {
1487
+ "epoch": 0.6038286235186874,
1488
+ "grad_norm": 1.1690552234649658,
1489
+ "learning_rate": 1.766407975617006e-05,
1490
+ "loss": 0.5240382552146912,
1491
+ "step": 207
1492
+ },
1493
+ {
1494
+ "epoch": 0.6067456700091157,
1495
+ "grad_norm": 1.0816599130630493,
1496
+ "learning_rate": 1.7631273661745362e-05,
1497
+ "loss": 0.6802893877029419,
1498
+ "step": 208
1499
+ },
1500
+ {
1501
+ "epoch": 0.6096627164995442,
1502
+ "grad_norm": 1.3662947416305542,
1503
+ "learning_rate": 1.7598269722125775e-05,
1504
+ "loss": 0.48193931579589844,
1505
+ "step": 209
1506
+ },
1507
+ {
1508
+ "epoch": 0.6125797629899726,
1509
+ "grad_norm": 0.9364766478538513,
1510
+ "learning_rate": 1.7565068792957576e-05,
1511
+ "loss": 0.5675849914550781,
1512
+ "step": 210
1513
+ },
1514
+ {
1515
+ "epoch": 0.6154968094804011,
1516
+ "grad_norm": 1.123828411102295,
1517
+ "learning_rate": 1.75316717349941e-05,
1518
+ "loss": 0.5474762916564941,
1519
+ "step": 211
1520
+ },
1521
+ {
1522
+ "epoch": 0.6184138559708295,
1523
+ "grad_norm": 1.1924363374710083,
1524
+ "learning_rate": 1.749807941407345e-05,
1525
+ "loss": 0.4918654263019562,
1526
+ "step": 212
1527
+ },
1528
+ {
1529
+ "epoch": 0.621330902461258,
1530
+ "grad_norm": 1.101293921470642,
1531
+ "learning_rate": 1.7464292701096014e-05,
1532
+ "loss": 0.5742691159248352,
1533
+ "step": 213
1534
+ },
1535
+ {
1536
+ "epoch": 0.6242479489516864,
1537
+ "grad_norm": 1.7374963760375977,
1538
+ "learning_rate": 1.7430312472001928e-05,
1539
+ "loss": 0.5828965902328491,
1540
+ "step": 214
1541
+ },
1542
+ {
1543
+ "epoch": 0.6271649954421149,
1544
+ "grad_norm": 1.3195666074752808,
1545
+ "learning_rate": 1.739613960774833e-05,
1546
+ "loss": 0.5265159010887146,
1547
+ "step": 215
1548
+ },
1549
+ {
1550
+ "epoch": 0.6300820419325432,
1551
+ "grad_norm": 1.254686713218689,
1552
+ "learning_rate": 1.7361774994286545e-05,
1553
+ "loss": 0.4929371476173401,
1554
+ "step": 216
1555
+ },
1556
+ {
1557
+ "epoch": 0.6329990884229717,
1558
+ "grad_norm": 1.1476380825042725,
1559
+ "learning_rate": 1.7327219522539102e-05,
1560
+ "loss": 0.5060417652130127,
1561
+ "step": 217
1562
+ },
1563
+ {
1564
+ "epoch": 0.6359161349134002,
1565
+ "grad_norm": 1.0914150476455688,
1566
+ "learning_rate": 1.7292474088376643e-05,
1567
+ "loss": 0.504043698310852,
1568
+ "step": 218
1569
+ },
1570
+ {
1571
+ "epoch": 0.6388331814038286,
1572
+ "grad_norm": 1.1339508295059204,
1573
+ "learning_rate": 1.7257539592594698e-05,
1574
+ "loss": 0.4797310531139374,
1575
+ "step": 219
1576
+ },
1577
+ {
1578
+ "epoch": 0.6417502278942571,
1579
+ "grad_norm": 1.0805399417877197,
1580
+ "learning_rate": 1.722241694089033e-05,
1581
+ "loss": 0.5878555178642273,
1582
+ "step": 220
1583
+ },
1584
+ {
1585
+ "epoch": 0.6446672743846855,
1586
+ "grad_norm": 1.8615056276321411,
1587
+ "learning_rate": 1.718710704383865e-05,
1588
+ "loss": 0.5005823969841003,
1589
+ "step": 221
1590
+ },
1591
+ {
1592
+ "epoch": 0.647584320875114,
1593
+ "grad_norm": 1.1445401906967163,
1594
+ "learning_rate": 1.7151610816869214e-05,
1595
+ "loss": 0.4949319064617157,
1596
+ "step": 222
1597
+ },
1598
+ {
1599
+ "epoch": 0.6505013673655424,
1600
+ "grad_norm": 0.9726515412330627,
1601
+ "learning_rate": 1.711592918024229e-05,
1602
+ "loss": 0.5073204040527344,
1603
+ "step": 223
1604
+ },
1605
+ {
1606
+ "epoch": 0.6534184138559709,
1607
+ "grad_norm": 1.4491140842437744,
1608
+ "learning_rate": 1.7080063059024998e-05,
1609
+ "loss": 0.47885262966156006,
1610
+ "step": 224
1611
+ },
1612
+ {
1613
+ "epoch": 0.6563354603463992,
1614
+ "grad_norm": 1.0070592164993286,
1615
+ "learning_rate": 1.7044013383067327e-05,
1616
+ "loss": 0.5775837898254395,
1617
+ "step": 225
1618
+ },
1619
+ {
1620
+ "epoch": 0.6592525068368277,
1621
+ "grad_norm": 0.966221272945404,
1622
+ "learning_rate": 1.7007781086978037e-05,
1623
+ "loss": 0.5050399899482727,
1624
+ "step": 226
1625
+ },
1626
+ {
1627
+ "epoch": 0.6621695533272561,
1628
+ "grad_norm": 0.9808815121650696,
1629
+ "learning_rate": 1.6971367110100407e-05,
1630
+ "loss": 0.5737045407295227,
1631
+ "step": 227
1632
+ },
1633
+ {
1634
+ "epoch": 0.6650865998176846,
1635
+ "grad_norm": 1.0158127546310425,
1636
+ "learning_rate": 1.6934772396487906e-05,
1637
+ "loss": 0.48077821731567383,
1638
+ "step": 228
1639
+ },
1640
+ {
1641
+ "epoch": 0.668003646308113,
1642
+ "grad_norm": 1.32015860080719,
1643
+ "learning_rate": 1.6897997894879706e-05,
1644
+ "loss": 0.5614925026893616,
1645
+ "step": 229
1646
+ },
1647
+ {
1648
+ "epoch": 0.6709206927985415,
1649
+ "grad_norm": 1.1055903434753418,
1650
+ "learning_rate": 1.686104455867608e-05,
1651
+ "loss": 0.4970760643482208,
1652
+ "step": 230
1653
+ },
1654
+ {
1655
+ "epoch": 0.67383773928897,
1656
+ "grad_norm": 1.0804500579833984,
1657
+ "learning_rate": 1.682391334591371e-05,
1658
+ "loss": 0.5540452003479004,
1659
+ "step": 231
1660
+ },
1661
+ {
1662
+ "epoch": 0.6767547857793984,
1663
+ "grad_norm": 1.1906245946884155,
1664
+ "learning_rate": 1.6786605219240807e-05,
1665
+ "loss": 0.5778501033782959,
1666
+ "step": 232
1667
+ },
1668
+ {
1669
+ "epoch": 0.6796718322698267,
1670
+ "grad_norm": 0.9758645296096802,
1671
+ "learning_rate": 1.6749121145892192e-05,
1672
+ "loss": 0.49073565006256104,
1673
+ "step": 233
1674
+ },
1675
+ {
1676
+ "epoch": 0.6825888787602552,
1677
+ "grad_norm": 1.1678364276885986,
1678
+ "learning_rate": 1.6711462097664207e-05,
1679
+ "loss": 0.4828741252422333,
1680
+ "step": 234
1681
+ },
1682
+ {
1683
+ "epoch": 0.6855059252506837,
1684
+ "grad_norm": 1.148301362991333,
1685
+ "learning_rate": 1.6673629050889507e-05,
1686
+ "loss": 0.5143818855285645,
1687
+ "step": 235
1688
+ },
1689
+ {
1690
+ "epoch": 0.6884229717411121,
1691
+ "grad_norm": 1.005898356437683,
1692
+ "learning_rate": 1.6635622986411776e-05,
1693
+ "loss": 0.5301160216331482,
1694
+ "step": 236
1695
+ },
1696
+ {
1697
+ "epoch": 0.6913400182315406,
1698
+ "grad_norm": 1.2227320671081543,
1699
+ "learning_rate": 1.659744488956027e-05,
1700
+ "loss": 0.4800386130809784,
1701
+ "step": 237
1702
+ },
1703
+ {
1704
+ "epoch": 0.694257064721969,
1705
+ "grad_norm": 0.986456573009491,
1706
+ "learning_rate": 1.6559095750124296e-05,
1707
+ "loss": 0.5098081827163696,
1708
+ "step": 238
1709
+ },
1710
+ {
1711
+ "epoch": 0.6971741112123975,
1712
+ "grad_norm": 1.1474376916885376,
1713
+ "learning_rate": 1.6520576562327518e-05,
1714
+ "loss": 0.5147273540496826,
1715
+ "step": 239
1716
+ },
1717
+ {
1718
+ "epoch": 0.7000911577028259,
1719
+ "grad_norm": 1.10917067527771,
1720
+ "learning_rate": 1.6481888324802223e-05,
1721
+ "loss": 0.5023190379142761,
1722
+ "step": 240
1723
+ },
1724
+ {
1725
+ "epoch": 0.7030082041932544,
1726
+ "grad_norm": 1.2339262962341309,
1727
+ "learning_rate": 1.644303204056341e-05,
1728
+ "loss": 0.5282092690467834,
1729
+ "step": 241
1730
+ },
1731
+ {
1732
+ "epoch": 0.7059252506836827,
1733
+ "grad_norm": 0.997941255569458,
1734
+ "learning_rate": 1.640400871698277e-05,
1735
+ "loss": 0.5635963082313538,
1736
+ "step": 242
1737
+ },
1738
+ {
1739
+ "epoch": 0.7088422971741112,
1740
+ "grad_norm": 1.0345823764801025,
1741
+ "learning_rate": 1.63648193657626e-05,
1742
+ "loss": 0.5577977895736694,
1743
+ "step": 243
1744
+ },
1745
+ {
1746
+ "epoch": 0.7117593436645396,
1747
+ "grad_norm": 1.3468303680419922,
1748
+ "learning_rate": 1.6325465002909554e-05,
1749
+ "loss": 0.4365362524986267,
1750
+ "step": 244
1751
+ },
1752
+ {
1753
+ "epoch": 0.7146763901549681,
1754
+ "grad_norm": 1.2817128896713257,
1755
+ "learning_rate": 1.628594664870831e-05,
1756
+ "loss": 0.46069926023483276,
1757
+ "step": 245
1758
+ },
1759
+ {
1760
+ "epoch": 0.7175934366453965,
1761
+ "grad_norm": 1.043311357498169,
1762
+ "learning_rate": 1.6246265327695117e-05,
1763
+ "loss": 0.5476971864700317,
1764
+ "step": 246
1765
+ },
1766
+ {
1767
+ "epoch": 0.720510483135825,
1768
+ "grad_norm": 1.0297389030456543,
1769
+ "learning_rate": 1.620642206863124e-05,
1770
+ "loss": 0.48051249980926514,
1771
+ "step": 247
1772
+ },
1773
+ {
1774
+ "epoch": 0.7234275296262535,
1775
+ "grad_norm": 1.4869836568832397,
1776
+ "learning_rate": 1.6166417904476257e-05,
1777
+ "loss": 0.5683314800262451,
1778
+ "step": 248
1779
+ },
1780
+ {
1781
+ "epoch": 0.7263445761166819,
1782
+ "grad_norm": 1.0628005266189575,
1783
+ "learning_rate": 1.6126253872361336e-05,
1784
+ "loss": 0.5277887582778931,
1785
+ "step": 249
1786
+ },
1787
+ {
1788
+ "epoch": 0.7292616226071102,
1789
+ "grad_norm": 1.2682170867919922,
1790
+ "learning_rate": 1.608593101356229e-05,
1791
+ "loss": 0.5048879384994507,
1792
+ "step": 250
1793
+ },
1794
+ {
1795
+ "epoch": 0.7292616226071102,
1796
+ "eval_loss": 0.5038471221923828,
1797
+ "eval_runtime": 1175.0375,
1798
+ "eval_samples_per_second": 0.538,
1799
+ "eval_steps_per_second": 0.538,
1800
+ "step": 250
1801
+ },
1802
+ {
1803
+ "epoch": 0.7321786690975387,
1804
+ "grad_norm": 1.7376199960708618,
1805
+ "learning_rate": 1.6045450373472626e-05,
1806
+ "loss": 0.5093721151351929,
1807
+ "step": 251
1808
+ },
1809
+ {
1810
+ "epoch": 0.7350957155879672,
1811
+ "grad_norm": 1.6047718524932861,
1812
+ "learning_rate": 1.6004813001576405e-05,
1813
+ "loss": 0.4796055555343628,
1814
+ "step": 252
1815
+ },
1816
+ {
1817
+ "epoch": 0.7380127620783956,
1818
+ "grad_norm": 1.3582886457443237,
1819
+ "learning_rate": 1.5964019951421058e-05,
1820
+ "loss": 0.4733014702796936,
1821
+ "step": 253
1822
+ },
1823
+ {
1824
+ "epoch": 0.7409298085688241,
1825
+ "grad_norm": 0.9468897581100464,
1826
+ "learning_rate": 1.5923072280590072e-05,
1827
+ "loss": 0.5312032103538513,
1828
+ "step": 254
1829
+ },
1830
+ {
1831
+ "epoch": 0.7438468550592525,
1832
+ "grad_norm": 1.3890198469161987,
1833
+ "learning_rate": 1.5881971050675547e-05,
1834
+ "loss": 0.47576645016670227,
1835
+ "step": 255
1836
+ },
1837
+ {
1838
+ "epoch": 0.746763901549681,
1839
+ "grad_norm": 1.782992959022522,
1840
+ "learning_rate": 1.584071732725071e-05,
1841
+ "loss": 0.5555092096328735,
1842
+ "step": 256
1843
+ },
1844
+ {
1845
+ "epoch": 0.7496809480401094,
1846
+ "grad_norm": 1.1790621280670166,
1847
+ "learning_rate": 1.5799312179842265e-05,
1848
+ "loss": 0.5148727893829346,
1849
+ "step": 257
1850
+ },
1851
+ {
1852
+ "epoch": 0.7525979945305379,
1853
+ "grad_norm": 1.446694254875183,
1854
+ "learning_rate": 1.5757756681902664e-05,
1855
+ "loss": 0.49939870834350586,
1856
+ "step": 258
1857
+ },
1858
+ {
1859
+ "epoch": 0.7555150410209662,
1860
+ "grad_norm": 1.1786166429519653,
1861
+ "learning_rate": 1.571605191078229e-05,
1862
+ "loss": 0.562156081199646,
1863
+ "step": 259
1864
+ },
1865
+ {
1866
+ "epoch": 0.7584320875113947,
1867
+ "grad_norm": 1.16925847530365,
1868
+ "learning_rate": 1.567419894770151e-05,
1869
+ "loss": 0.49580734968185425,
1870
+ "step": 260
1871
+ },
1872
+ {
1873
+ "epoch": 0.7613491340018231,
1874
+ "grad_norm": 1.60944664478302,
1875
+ "learning_rate": 1.5632198877722676e-05,
1876
+ "loss": 0.4821680784225464,
1877
+ "step": 261
1878
+ },
1879
+ {
1880
+ "epoch": 0.7642661804922516,
1881
+ "grad_norm": 1.3957884311676025,
1882
+ "learning_rate": 1.5590052789721946e-05,
1883
+ "loss": 0.4392276406288147,
1884
+ "step": 262
1885
+ },
1886
+ {
1887
+ "epoch": 0.76718322698268,
1888
+ "grad_norm": 1.636195421218872,
1889
+ "learning_rate": 1.5547761776361096e-05,
1890
+ "loss": 0.39603114128112793,
1891
+ "step": 263
1892
+ },
1893
+ {
1894
+ "epoch": 0.7701002734731085,
1895
+ "grad_norm": 1.496766448020935,
1896
+ "learning_rate": 1.550532693405917e-05,
1897
+ "loss": 0.4833749234676361,
1898
+ "step": 264
1899
+ },
1900
+ {
1901
+ "epoch": 0.773017319963537,
1902
+ "grad_norm": 1.3587844371795654,
1903
+ "learning_rate": 1.5462749362964058e-05,
1904
+ "loss": 0.43738317489624023,
1905
+ "step": 265
1906
+ },
1907
+ {
1908
+ "epoch": 0.7759343664539654,
1909
+ "grad_norm": 1.670704960823059,
1910
+ "learning_rate": 1.5420030166923983e-05,
1911
+ "loss": 0.4476737380027771,
1912
+ "step": 266
1913
+ },
1914
+ {
1915
+ "epoch": 0.7788514129443938,
1916
+ "grad_norm": 1.2674932479858398,
1917
+ "learning_rate": 1.537717045345888e-05,
1918
+ "loss": 0.42266708612442017,
1919
+ "step": 267
1920
+ },
1921
+ {
1922
+ "epoch": 0.7817684594348222,
1923
+ "grad_norm": 2.0639536380767822,
1924
+ "learning_rate": 1.5334171333731666e-05,
1925
+ "loss": 0.5245381593704224,
1926
+ "step": 268
1927
+ },
1928
+ {
1929
+ "epoch": 0.7846855059252507,
1930
+ "grad_norm": 1.2091766595840454,
1931
+ "learning_rate": 1.529103392251946e-05,
1932
+ "loss": 0.5166443586349487,
1933
+ "step": 269
1934
+ },
1935
+ {
1936
+ "epoch": 0.7876025524156791,
1937
+ "grad_norm": 1.1021631956100464,
1938
+ "learning_rate": 1.5247759338184653e-05,
1939
+ "loss": 0.5674265027046204,
1940
+ "step": 270
1941
+ },
1942
+ {
1943
+ "epoch": 0.7905195989061076,
1944
+ "grad_norm": 1.3143829107284546,
1945
+ "learning_rate": 1.520434870264595e-05,
1946
+ "loss": 0.40855613350868225,
1947
+ "step": 271
1948
+ },
1949
+ {
1950
+ "epoch": 0.793436645396536,
1951
+ "grad_norm": 1.1784812211990356,
1952
+ "learning_rate": 1.5160803141349244e-05,
1953
+ "loss": 0.4308925271034241,
1954
+ "step": 272
1955
+ },
1956
+ {
1957
+ "epoch": 0.7963536918869645,
1958
+ "grad_norm": 2.1635706424713135,
1959
+ "learning_rate": 1.5117123783238458e-05,
1960
+ "loss": 0.45035502314567566,
1961
+ "step": 273
1962
+ },
1963
+ {
1964
+ "epoch": 0.7992707383773929,
1965
+ "grad_norm": 1.569203495979309,
1966
+ "learning_rate": 1.5073311760726287e-05,
1967
+ "loss": 0.5095728635787964,
1968
+ "step": 274
1969
+ },
1970
+ {
1971
+ "epoch": 0.8021877848678214,
1972
+ "grad_norm": 2.532621383666992,
1973
+ "learning_rate": 1.5029368209664822e-05,
1974
+ "loss": 0.496748685836792,
1975
+ "step": 275
1976
+ },
1977
+ {
1978
+ "epoch": 0.8051048313582497,
1979
+ "grad_norm": 1.6312552690505981,
1980
+ "learning_rate": 1.4985294269316098e-05,
1981
+ "loss": 0.4972914159297943,
1982
+ "step": 276
1983
+ },
1984
+ {
1985
+ "epoch": 0.8080218778486782,
1986
+ "grad_norm": 1.3996756076812744,
1987
+ "learning_rate": 1.4941091082322579e-05,
1988
+ "loss": 0.5589750409126282,
1989
+ "step": 277
1990
+ },
1991
+ {
1992
+ "epoch": 0.8109389243391066,
1993
+ "grad_norm": 1.1288363933563232,
1994
+ "learning_rate": 1.4896759794677526e-05,
1995
+ "loss": 0.5349453687667847,
1996
+ "step": 278
1997
+ },
1998
+ {
1999
+ "epoch": 0.8138559708295351,
2000
+ "grad_norm": 1.6913920640945435,
2001
+ "learning_rate": 1.4852301555695268e-05,
2002
+ "loss": 0.46511000394821167,
2003
+ "step": 279
2004
+ },
2005
+ {
2006
+ "epoch": 0.8167730173199635,
2007
+ "grad_norm": 1.1913212537765503,
2008
+ "learning_rate": 1.4807717517981439e-05,
2009
+ "loss": 0.4715422987937927,
2010
+ "step": 280
2011
+ },
2012
+ {
2013
+ "epoch": 0.819690063810392,
2014
+ "grad_norm": 1.1179691553115845,
2015
+ "learning_rate": 1.476300883740307e-05,
2016
+ "loss": 0.53330397605896,
2017
+ "step": 281
2018
+ },
2019
+ {
2020
+ "epoch": 0.8226071103008205,
2021
+ "grad_norm": 1.7473797798156738,
2022
+ "learning_rate": 1.4718176673058624e-05,
2023
+ "loss": 0.47564437985420227,
2024
+ "step": 282
2025
+ },
2026
+ {
2027
+ "epoch": 0.8255241567912489,
2028
+ "grad_norm": 1.2653177976608276,
2029
+ "learning_rate": 1.4673222187247963e-05,
2030
+ "loss": 0.46364277601242065,
2031
+ "step": 283
2032
+ },
2033
+ {
2034
+ "epoch": 0.8284412032816773,
2035
+ "grad_norm": 1.2567330598831177,
2036
+ "learning_rate": 1.4628146545442202e-05,
2037
+ "loss": 0.4778091013431549,
2038
+ "step": 284
2039
+ },
2040
+ {
2041
+ "epoch": 0.8313582497721057,
2042
+ "grad_norm": 1.5848406553268433,
2043
+ "learning_rate": 1.4582950916253488e-05,
2044
+ "loss": 0.4480203688144684,
2045
+ "step": 285
2046
+ },
2047
+ {
2048
+ "epoch": 0.8342752962625342,
2049
+ "grad_norm": 1.3278183937072754,
2050
+ "learning_rate": 1.453763647140472e-05,
2051
+ "loss": 0.37945032119750977,
2052
+ "step": 286
2053
+ },
2054
+ {
2055
+ "epoch": 0.8371923427529626,
2056
+ "grad_norm": 1.0961651802062988,
2057
+ "learning_rate": 1.4492204385699155e-05,
2058
+ "loss": 0.5306747555732727,
2059
+ "step": 287
2060
+ },
2061
+ {
2062
+ "epoch": 0.8401093892433911,
2063
+ "grad_norm": 1.176276683807373,
2064
+ "learning_rate": 1.4446655836989961e-05,
2065
+ "loss": 0.49950045347213745,
2066
+ "step": 288
2067
+ },
2068
+ {
2069
+ "epoch": 0.8430264357338195,
2070
+ "grad_norm": 1.2228269577026367,
2071
+ "learning_rate": 1.4400992006149674e-05,
2072
+ "loss": 0.494475394487381,
2073
+ "step": 289
2074
+ },
2075
+ {
2076
+ "epoch": 0.845943482224248,
2077
+ "grad_norm": 1.1584209203720093,
2078
+ "learning_rate": 1.4355214077039592e-05,
2079
+ "loss": 0.44170859456062317,
2080
+ "step": 290
2081
+ },
2082
+ {
2083
+ "epoch": 0.8488605287146764,
2084
+ "grad_norm": 1.2041938304901123,
2085
+ "learning_rate": 1.4309323236479071e-05,
2086
+ "loss": 0.4359871745109558,
2087
+ "step": 291
2088
+ },
2089
+ {
2090
+ "epoch": 0.8517775752051049,
2091
+ "grad_norm": 1.279645562171936,
2092
+ "learning_rate": 1.4263320674214762e-05,
2093
+ "loss": 0.45031386613845825,
2094
+ "step": 292
2095
+ },
2096
+ {
2097
+ "epoch": 0.8546946216955332,
2098
+ "grad_norm": 1.3958357572555542,
2099
+ "learning_rate": 1.4217207582889769e-05,
2100
+ "loss": 0.4832204580307007,
2101
+ "step": 293
2102
+ },
2103
+ {
2104
+ "epoch": 0.8576116681859617,
2105
+ "grad_norm": 1.2788586616516113,
2106
+ "learning_rate": 1.4170985158012725e-05,
2107
+ "loss": 0.5154346227645874,
2108
+ "step": 294
2109
+ },
2110
+ {
2111
+ "epoch": 0.8605287146763901,
2112
+ "grad_norm": 1.3634892702102661,
2113
+ "learning_rate": 1.4124654597926795e-05,
2114
+ "loss": 0.46777206659317017,
2115
+ "step": 295
2116
+ },
2117
+ {
2118
+ "epoch": 0.8634457611668186,
2119
+ "grad_norm": 1.2719579935073853,
2120
+ "learning_rate": 1.4078217103778619e-05,
2121
+ "loss": 0.4247053265571594,
2122
+ "step": 296
2123
+ },
2124
+ {
2125
+ "epoch": 0.866362807657247,
2126
+ "grad_norm": 2.890467643737793,
2127
+ "learning_rate": 1.4031673879487161e-05,
2128
+ "loss": 0.38349640369415283,
2129
+ "step": 297
2130
+ },
2131
+ {
2132
+ "epoch": 0.8692798541476755,
2133
+ "grad_norm": 2.4354801177978516,
2134
+ "learning_rate": 1.3985026131712499e-05,
2135
+ "loss": 0.4134889543056488,
2136
+ "step": 298
2137
+ },
2138
+ {
2139
+ "epoch": 0.872196900638104,
2140
+ "grad_norm": 1.0138323307037354,
2141
+ "learning_rate": 1.3938275069824541e-05,
2142
+ "loss": 0.5176680684089661,
2143
+ "step": 299
2144
+ },
2145
+ {
2146
+ "epoch": 0.8751139471285324,
2147
+ "grad_norm": 1.2316186428070068,
2148
+ "learning_rate": 1.389142190587168e-05,
2149
+ "loss": 0.4818477928638458,
2150
+ "step": 300
2151
+ },
2152
+ {
2153
+ "epoch": 0.8751139471285324,
2154
+ "eval_loss": 0.4752846360206604,
2155
+ "eval_runtime": 1189.1666,
2156
+ "eval_samples_per_second": 0.531,
2157
+ "eval_steps_per_second": 0.531,
2158
+ "step": 300
2159
+ },
2160
+ {
2161
+ "epoch": 0.8780309936189608,
2162
+ "grad_norm": 1.515487551689148,
2163
+ "learning_rate": 1.384446785454936e-05,
2164
+ "loss": 0.47766175866127014,
2165
+ "step": 301
2166
+ },
2167
+ {
2168
+ "epoch": 0.8809480401093892,
2169
+ "grad_norm": 1.4357497692108154,
2170
+ "learning_rate": 1.3797414133168591e-05,
2171
+ "loss": 0.49297061562538147,
2172
+ "step": 302
2173
+ },
2174
+ {
2175
+ "epoch": 0.8838650865998177,
2176
+ "grad_norm": 1.2523037195205688,
2177
+ "learning_rate": 1.3750261961624383e-05,
2178
+ "loss": 0.4629015326499939,
2179
+ "step": 303
2180
+ },
2181
+ {
2182
+ "epoch": 0.8867821330902461,
2183
+ "grad_norm": 3.5790023803710938,
2184
+ "learning_rate": 1.3703012562364124e-05,
2185
+ "loss": 0.3773120045661926,
2186
+ "step": 304
2187
+ },
2188
+ {
2189
+ "epoch": 0.8896991795806746,
2190
+ "grad_norm": 1.9305704832077026,
2191
+ "learning_rate": 1.3655667160355892e-05,
2192
+ "loss": 0.496719628572464,
2193
+ "step": 305
2194
+ },
2195
+ {
2196
+ "epoch": 0.892616226071103,
2197
+ "grad_norm": 1.1506154537200928,
2198
+ "learning_rate": 1.3608226983056687e-05,
2199
+ "loss": 0.49487072229385376,
2200
+ "step": 306
2201
+ },
2202
+ {
2203
+ "epoch": 0.8955332725615315,
2204
+ "grad_norm": 1.8046090602874756,
2205
+ "learning_rate": 1.3560693260380614e-05,
2206
+ "loss": 0.4910697937011719,
2207
+ "step": 307
2208
+ },
2209
+ {
2210
+ "epoch": 0.8984503190519599,
2211
+ "grad_norm": 2.0088653564453125,
2212
+ "learning_rate": 1.3513067224667e-05,
2213
+ "loss": 0.508246660232544,
2214
+ "step": 308
2215
+ },
2216
+ {
2217
+ "epoch": 0.9013673655423883,
2218
+ "grad_norm": 1.2966033220291138,
2219
+ "learning_rate": 1.3465350110648437e-05,
2220
+ "loss": 0.5125166177749634,
2221
+ "step": 309
2222
+ },
2223
+ {
2224
+ "epoch": 0.9042844120328167,
2225
+ "grad_norm": 1.9976309537887573,
2226
+ "learning_rate": 1.3417543155418775e-05,
2227
+ "loss": 0.43942537903785706,
2228
+ "step": 310
2229
+ },
2230
+ {
2231
+ "epoch": 0.9072014585232452,
2232
+ "grad_norm": 1.2663682699203491,
2233
+ "learning_rate": 1.336964759840105e-05,
2234
+ "loss": 0.4839101731777191,
2235
+ "step": 311
2236
+ },
2237
+ {
2238
+ "epoch": 0.9101185050136736,
2239
+ "grad_norm": 1.1223328113555908,
2240
+ "learning_rate": 1.3321664681315354e-05,
2241
+ "loss": 0.48008066415786743,
2242
+ "step": 312
2243
+ },
2244
+ {
2245
+ "epoch": 0.9130355515041021,
2246
+ "grad_norm": 1.5786972045898438,
2247
+ "learning_rate": 1.3273595648146634e-05,
2248
+ "loss": 0.47250309586524963,
2249
+ "step": 313
2250
+ },
2251
+ {
2252
+ "epoch": 0.9159525979945305,
2253
+ "grad_norm": 1.2150241136550903,
2254
+ "learning_rate": 1.322544174511245e-05,
2255
+ "loss": 0.5149738788604736,
2256
+ "step": 314
2257
+ },
2258
+ {
2259
+ "epoch": 0.918869644484959,
2260
+ "grad_norm": 1.3676542043685913,
2261
+ "learning_rate": 1.3177204220630662e-05,
2262
+ "loss": 0.4430195093154907,
2263
+ "step": 315
2264
+ },
2265
+ {
2266
+ "epoch": 0.9217866909753875,
2267
+ "grad_norm": 1.0703285932540894,
2268
+ "learning_rate": 1.3128884325287064e-05,
2269
+ "loss": 0.4798983037471771,
2270
+ "step": 316
2271
+ },
2272
+ {
2273
+ "epoch": 0.9247037374658159,
2274
+ "grad_norm": 1.3131535053253174,
2275
+ "learning_rate": 1.308048331180296e-05,
2276
+ "loss": 0.4241073727607727,
2277
+ "step": 317
2278
+ },
2279
+ {
2280
+ "epoch": 0.9276207839562443,
2281
+ "grad_norm": 1.4485348463058472,
2282
+ "learning_rate": 1.3032002435002698e-05,
2283
+ "loss": 0.527199923992157,
2284
+ "step": 318
2285
+ },
2286
+ {
2287
+ "epoch": 0.9305378304466727,
2288
+ "grad_norm": 1.370936393737793,
2289
+ "learning_rate": 1.2983442951781114e-05,
2290
+ "loss": 0.47125962376594543,
2291
+ "step": 319
2292
+ },
2293
+ {
2294
+ "epoch": 0.9334548769371012,
2295
+ "grad_norm": 1.2369643449783325,
2296
+ "learning_rate": 1.2934806121070973e-05,
2297
+ "loss": 0.4814244210720062,
2298
+ "step": 320
2299
+ },
2300
+ {
2301
+ "epoch": 0.9363719234275296,
2302
+ "grad_norm": 1.2632933855056763,
2303
+ "learning_rate": 1.2886093203810314e-05,
2304
+ "loss": 0.4915548264980316,
2305
+ "step": 321
2306
+ },
2307
+ {
2308
+ "epoch": 0.9392889699179581,
2309
+ "grad_norm": 1.054569959640503,
2310
+ "learning_rate": 1.2837305462909764e-05,
2311
+ "loss": 0.5325602293014526,
2312
+ "step": 322
2313
+ },
2314
+ {
2315
+ "epoch": 0.9422060164083865,
2316
+ "grad_norm": 1.15959632396698,
2317
+ "learning_rate": 1.27884441632198e-05,
2318
+ "loss": 0.43607404828071594,
2319
+ "step": 323
2320
+ },
2321
+ {
2322
+ "epoch": 0.945123062898815,
2323
+ "grad_norm": 1.1667979955673218,
2324
+ "learning_rate": 1.2739510571497945e-05,
2325
+ "loss": 0.4631507992744446,
2326
+ "step": 324
2327
+ },
2328
+ {
2329
+ "epoch": 0.9480401093892434,
2330
+ "grad_norm": 1.6009081602096558,
2331
+ "learning_rate": 1.2690505956375944e-05,
2332
+ "loss": 0.4935731887817383,
2333
+ "step": 325
2334
+ },
2335
+ {
2336
+ "epoch": 0.9509571558796718,
2337
+ "grad_norm": 1.1193996667861938,
2338
+ "learning_rate": 1.2641431588326858e-05,
2339
+ "loss": 0.45883435010910034,
2340
+ "step": 326
2341
+ },
2342
+ {
2343
+ "epoch": 0.9538742023701002,
2344
+ "grad_norm": 1.5365067720413208,
2345
+ "learning_rate": 1.2592288739632138e-05,
2346
+ "loss": 0.5206276178359985,
2347
+ "step": 327
2348
+ },
2349
+ {
2350
+ "epoch": 0.9567912488605287,
2351
+ "grad_norm": 1.0714622735977173,
2352
+ "learning_rate": 1.2543078684348632e-05,
2353
+ "loss": 0.5242853760719299,
2354
+ "step": 328
2355
+ },
2356
+ {
2357
+ "epoch": 0.9597082953509571,
2358
+ "grad_norm": 1.3009248971939087,
2359
+ "learning_rate": 1.2493802698275557e-05,
2360
+ "loss": 0.4794357717037201,
2361
+ "step": 329
2362
+ },
2363
+ {
2364
+ "epoch": 0.9626253418413856,
2365
+ "grad_norm": 1.495771050453186,
2366
+ "learning_rate": 1.244446205892143e-05,
2367
+ "loss": 0.5849282145500183,
2368
+ "step": 330
2369
+ },
2370
+ {
2371
+ "epoch": 0.965542388331814,
2372
+ "grad_norm": 1.2046003341674805,
2373
+ "learning_rate": 1.2395058045470935e-05,
2374
+ "loss": 0.47758305072784424,
2375
+ "step": 331
2376
+ },
2377
+ {
2378
+ "epoch": 0.9684594348222425,
2379
+ "grad_norm": 1.1362569332122803,
2380
+ "learning_rate": 1.2345591938751772e-05,
2381
+ "loss": 0.4490663409233093,
2382
+ "step": 332
2383
+ },
2384
+ {
2385
+ "epoch": 0.971376481312671,
2386
+ "grad_norm": 1.2658129930496216,
2387
+ "learning_rate": 1.2296065021201438e-05,
2388
+ "loss": 0.4035309851169586,
2389
+ "step": 333
2390
+ },
2391
+ {
2392
+ "epoch": 0.9742935278030994,
2393
+ "grad_norm": 4.370306015014648,
2394
+ "learning_rate": 1.2246478576833993e-05,
2395
+ "loss": 0.495273619890213,
2396
+ "step": 334
2397
+ },
2398
+ {
2399
+ "epoch": 0.9772105742935278,
2400
+ "grad_norm": 1.3863654136657715,
2401
+ "learning_rate": 1.219683389120676e-05,
2402
+ "loss": 0.46410733461380005,
2403
+ "step": 335
2404
+ },
2405
+ {
2406
+ "epoch": 0.9801276207839562,
2407
+ "grad_norm": 1.4544321298599243,
2408
+ "learning_rate": 1.2147132251387004e-05,
2409
+ "loss": 0.4301709830760956,
2410
+ "step": 336
2411
+ },
2412
+ {
2413
+ "epoch": 0.9830446672743847,
2414
+ "grad_norm": 1.0852457284927368,
2415
+ "learning_rate": 1.2097374945918554e-05,
2416
+ "loss": 0.48892468214035034,
2417
+ "step": 337
2418
+ },
2419
+ {
2420
+ "epoch": 0.9859617137648131,
2421
+ "grad_norm": 1.5062257051467896,
2422
+ "learning_rate": 1.2047563264788412e-05,
2423
+ "loss": 0.4667983055114746,
2424
+ "step": 338
2425
+ },
2426
+ {
2427
+ "epoch": 0.9888787602552416,
2428
+ "grad_norm": 1.2472951412200928,
2429
+ "learning_rate": 1.199769849939329e-05,
2430
+ "loss": 0.4827345013618469,
2431
+ "step": 339
2432
+ },
2433
+ {
2434
+ "epoch": 0.99179580674567,
2435
+ "grad_norm": 1.2589871883392334,
2436
+ "learning_rate": 1.1947781942506151e-05,
2437
+ "loss": 0.405245304107666,
2438
+ "step": 340
2439
+ },
2440
+ {
2441
+ "epoch": 0.9947128532360985,
2442
+ "grad_norm": 1.25636625289917,
2443
+ "learning_rate": 1.1897814888242679e-05,
2444
+ "loss": 0.37956133484840393,
2445
+ "step": 341
2446
+ },
2447
+ {
2448
+ "epoch": 0.9976298997265269,
2449
+ "grad_norm": 2.7064895629882812,
2450
+ "learning_rate": 1.1847798632027726e-05,
2451
+ "loss": 0.489456444978714,
2452
+ "step": 342
2453
+ },
2454
+ {
2455
+ "epoch": 1.0,
2456
+ "grad_norm": 1.6156240701675415,
2457
+ "learning_rate": 1.1797734470561744e-05,
2458
+ "loss": 0.46473199129104614,
2459
+ "step": 343
2460
+ },
2461
+ {
2462
+ "epoch": 1.0029170464904285,
2463
+ "grad_norm": 1.3046343326568604,
2464
+ "learning_rate": 1.1747623701787143e-05,
2465
+ "loss": 0.3504878282546997,
2466
+ "step": 344
2467
+ },
2468
+ {
2469
+ "epoch": 1.005834092980857,
2470
+ "grad_norm": 1.414828896522522,
2471
+ "learning_rate": 1.1697467624854666e-05,
2472
+ "loss": 0.4719260334968567,
2473
+ "step": 345
2474
+ },
2475
+ {
2476
+ "epoch": 1.0087511394712854,
2477
+ "grad_norm": 1.1873356103897095,
2478
+ "learning_rate": 1.164726754008969e-05,
2479
+ "loss": 0.45313555002212524,
2480
+ "step": 346
2481
+ },
2482
+ {
2483
+ "epoch": 1.0116681859617138,
2484
+ "grad_norm": 1.1382380723953247,
2485
+ "learning_rate": 1.1597024748958526e-05,
2486
+ "loss": 0.4365478456020355,
2487
+ "step": 347
2488
+ },
2489
+ {
2490
+ "epoch": 1.0145852324521423,
2491
+ "grad_norm": 1.8141961097717285,
2492
+ "learning_rate": 1.1546740554034661e-05,
2493
+ "loss": 0.3694503605365753,
2494
+ "step": 348
2495
+ },
2496
+ {
2497
+ "epoch": 1.0175022789425707,
2498
+ "grad_norm": 1.333388328552246,
2499
+ "learning_rate": 1.1496416258965015e-05,
2500
+ "loss": 0.4755721688270569,
2501
+ "step": 349
2502
+ },
2503
+ {
2504
+ "epoch": 1.0204193254329992,
2505
+ "grad_norm": 1.3464443683624268,
2506
+ "learning_rate": 1.1446053168436117e-05,
2507
+ "loss": 0.4227846562862396,
2508
+ "step": 350
2509
+ },
2510
+ {
2511
+ "epoch": 1.0204193254329992,
2512
+ "eval_loss": 0.44924086332321167,
2513
+ "eval_runtime": 1214.6648,
2514
+ "eval_samples_per_second": 0.52,
2515
+ "eval_steps_per_second": 0.52,
2516
+ "step": 350
2517
+ },
2518
+ {
2519
+ "epoch": 1.0233363719234276,
2520
+ "grad_norm": 1.2682689428329468,
2521
+ "learning_rate": 1.1395652588140292e-05,
2522
+ "loss": 0.44300130009651184,
2523
+ "step": 351
2524
+ },
2525
+ {
2526
+ "epoch": 1.0262534184138559,
2527
+ "grad_norm": 1.7737696170806885,
2528
+ "learning_rate": 1.1345215824741814e-05,
2529
+ "loss": 0.5106258988380432,
2530
+ "step": 352
2531
+ },
2532
+ {
2533
+ "epoch": 1.0291704649042843,
2534
+ "grad_norm": 1.2601238489151,
2535
+ "learning_rate": 1.1294744185843014e-05,
2536
+ "loss": 0.45930635929107666,
2537
+ "step": 353
2538
+ },
2539
+ {
2540
+ "epoch": 1.0320875113947128,
2541
+ "grad_norm": 1.2162678241729736,
2542
+ "learning_rate": 1.1244238979950406e-05,
2543
+ "loss": 0.44163084030151367,
2544
+ "step": 354
2545
+ },
2546
+ {
2547
+ "epoch": 1.0350045578851412,
2548
+ "grad_norm": 1.0905817747116089,
2549
+ "learning_rate": 1.1193701516440733e-05,
2550
+ "loss": 0.510662317276001,
2551
+ "step": 355
2552
+ },
2553
+ {
2554
+ "epoch": 1.0379216043755697,
2555
+ "grad_norm": 0.9624952673912048,
2556
+ "learning_rate": 1.1143133105527048e-05,
2557
+ "loss": 0.5297917127609253,
2558
+ "step": 356
2559
+ },
2560
+ {
2561
+ "epoch": 1.0408386508659981,
2562
+ "grad_norm": 1.2757681608200073,
2563
+ "learning_rate": 1.1092535058224725e-05,
2564
+ "loss": 0.4332093596458435,
2565
+ "step": 357
2566
+ },
2567
+ {
2568
+ "epoch": 1.0437556973564266,
2569
+ "grad_norm": 1.6885719299316406,
2570
+ "learning_rate": 1.104190868631748e-05,
2571
+ "loss": 0.4337635040283203,
2572
+ "step": 358
2573
+ },
2574
+ {
2575
+ "epoch": 1.046672743846855,
2576
+ "grad_norm": 1.175484538078308,
2577
+ "learning_rate": 1.099125530232336e-05,
2578
+ "loss": 0.45411020517349243,
2579
+ "step": 359
2580
+ },
2581
+ {
2582
+ "epoch": 1.0495897903372835,
2583
+ "grad_norm": 1.0964939594268799,
2584
+ "learning_rate": 1.0940576219460723e-05,
2585
+ "loss": 0.5333439707756042,
2586
+ "step": 360
2587
+ },
2588
+ {
2589
+ "epoch": 1.052506836827712,
2590
+ "grad_norm": 1.5493136644363403,
2591
+ "learning_rate": 1.0889872751614176e-05,
2592
+ "loss": 0.4400906264781952,
2593
+ "step": 361
2594
+ },
2595
+ {
2596
+ "epoch": 1.0554238833181404,
2597
+ "grad_norm": 1.2491416931152344,
2598
+ "learning_rate": 1.0839146213300526e-05,
2599
+ "loss": 0.31049978733062744,
2600
+ "step": 362
2601
+ },
2602
+ {
2603
+ "epoch": 1.0583409298085689,
2604
+ "grad_norm": 1.7213693857192993,
2605
+ "learning_rate": 1.0788397919634694e-05,
2606
+ "loss": 0.389009028673172,
2607
+ "step": 363
2608
+ },
2609
+ {
2610
+ "epoch": 1.0612579762989973,
2611
+ "grad_norm": 1.5405336618423462,
2612
+ "learning_rate": 1.0737629186295621e-05,
2613
+ "loss": 0.4068562984466553,
2614
+ "step": 364
2615
+ },
2616
+ {
2617
+ "epoch": 1.0641750227894258,
2618
+ "grad_norm": 1.225455641746521,
2619
+ "learning_rate": 1.0686841329492159e-05,
2620
+ "loss": 0.47358617186546326,
2621
+ "step": 365
2622
+ },
2623
+ {
2624
+ "epoch": 1.0670920692798542,
2625
+ "grad_norm": 1.3436250686645508,
2626
+ "learning_rate": 1.0636035665928945e-05,
2627
+ "loss": 0.47050854563713074,
2628
+ "step": 366
2629
+ },
2630
+ {
2631
+ "epoch": 1.0700091157702827,
2632
+ "grad_norm": 1.4952112436294556,
2633
+ "learning_rate": 1.058521351277227e-05,
2634
+ "loss": 0.43496906757354736,
2635
+ "step": 367
2636
+ },
2637
+ {
2638
+ "epoch": 1.072926162260711,
2639
+ "grad_norm": 1.549112319946289,
2640
+ "learning_rate": 1.0534376187615924e-05,
2641
+ "loss": 0.45711052417755127,
2642
+ "step": 368
2643
+ },
2644
+ {
2645
+ "epoch": 1.0758432087511394,
2646
+ "grad_norm": 1.3851526975631714,
2647
+ "learning_rate": 1.048352500844704e-05,
2648
+ "loss": 0.45045915246009827,
2649
+ "step": 369
2650
+ },
2651
+ {
2652
+ "epoch": 1.0787602552415678,
2653
+ "grad_norm": 1.6302049160003662,
2654
+ "learning_rate": 1.0432661293611927e-05,
2655
+ "loss": 0.3736046254634857,
2656
+ "step": 370
2657
+ },
2658
+ {
2659
+ "epoch": 1.0816773017319963,
2660
+ "grad_norm": 1.3365869522094727,
2661
+ "learning_rate": 1.0381786361781885e-05,
2662
+ "loss": 0.42242100834846497,
2663
+ "step": 371
2664
+ },
2665
+ {
2666
+ "epoch": 1.0845943482224247,
2667
+ "grad_norm": 1.4369138479232788,
2668
+ "learning_rate": 1.0330901531919026e-05,
2669
+ "loss": 0.44570961594581604,
2670
+ "step": 372
2671
+ },
2672
+ {
2673
+ "epoch": 1.0875113947128532,
2674
+ "grad_norm": 1.3528283834457397,
2675
+ "learning_rate": 1.0280008123242069e-05,
2676
+ "loss": 0.43440738320350647,
2677
+ "step": 373
2678
+ },
2679
+ {
2680
+ "epoch": 1.0904284412032816,
2681
+ "grad_norm": 1.469660997390747,
2682
+ "learning_rate": 1.0229107455192147e-05,
2683
+ "loss": 0.3960394263267517,
2684
+ "step": 374
2685
+ },
2686
+ {
2687
+ "epoch": 1.09334548769371,
2688
+ "grad_norm": 1.4542185068130493,
2689
+ "learning_rate": 1.0178200847398595e-05,
2690
+ "loss": 0.47834208607673645,
2691
+ "step": 375
2692
+ },
2693
+ {
2694
+ "epoch": 1.0962625341841385,
2695
+ "grad_norm": 1.6470292806625366,
2696
+ "learning_rate": 1.0127289619644737e-05,
2697
+ "loss": 0.42791086435317993,
2698
+ "step": 376
2699
+ },
2700
+ {
2701
+ "epoch": 1.099179580674567,
2702
+ "grad_norm": 1.1934021711349487,
2703
+ "learning_rate": 1.0076375091833681e-05,
2704
+ "loss": 0.4401305019855499,
2705
+ "step": 377
2706
+ },
2707
+ {
2708
+ "epoch": 1.1020966271649955,
2709
+ "grad_norm": 0.9786668419837952,
2710
+ "learning_rate": 1.0025458583954078e-05,
2711
+ "loss": 0.4816555678844452,
2712
+ "step": 378
2713
+ },
2714
+ {
2715
+ "epoch": 1.105013673655424,
2716
+ "grad_norm": 1.1348779201507568,
2717
+ "learning_rate": 9.974541416045924e-06,
2718
+ "loss": 0.41516968607902527,
2719
+ "step": 379
2720
+ },
2721
+ {
2722
+ "epoch": 1.1079307201458524,
2723
+ "grad_norm": 1.0188615322113037,
2724
+ "learning_rate": 9.923624908166322e-06,
2725
+ "loss": 0.48087278008461,
2726
+ "step": 380
2727
+ },
2728
+ {
2729
+ "epoch": 1.1108477666362808,
2730
+ "grad_norm": 1.0821740627288818,
2731
+ "learning_rate": 9.872710380355263e-06,
2732
+ "loss": 0.41974008083343506,
2733
+ "step": 381
2734
+ },
2735
+ {
2736
+ "epoch": 1.1137648131267093,
2737
+ "grad_norm": 1.250951886177063,
2738
+ "learning_rate": 9.82179915260141e-06,
2739
+ "loss": 0.42703643441200256,
2740
+ "step": 382
2741
+ },
2742
+ {
2743
+ "epoch": 1.1166818596171377,
2744
+ "grad_norm": 1.4528254270553589,
2745
+ "learning_rate": 9.770892544807856e-06,
2746
+ "loss": 0.43801453709602356,
2747
+ "step": 383
2748
+ },
2749
+ {
2750
+ "epoch": 1.1195989061075662,
2751
+ "grad_norm": 1.813859462738037,
2752
+ "learning_rate": 9.719991876757934e-06,
2753
+ "loss": 0.4344240725040436,
2754
+ "step": 384
2755
+ },
2756
+ {
2757
+ "epoch": 1.1225159525979946,
2758
+ "grad_norm": 1.6681253910064697,
2759
+ "learning_rate": 9.669098468080976e-06,
2760
+ "loss": 0.4356998801231384,
2761
+ "step": 385
2762
+ },
2763
+ {
2764
+ "epoch": 1.125432999088423,
2765
+ "grad_norm": 1.3447953462600708,
2766
+ "learning_rate": 9.618213638218117e-06,
2767
+ "loss": 0.43189188838005066,
2768
+ "step": 386
2769
+ },
2770
+ {
2771
+ "epoch": 1.1283500455788513,
2772
+ "grad_norm": 1.9577926397323608,
2773
+ "learning_rate": 9.567338706388074e-06,
2774
+ "loss": 0.34984707832336426,
2775
+ "step": 387
2776
+ },
2777
+ {
2778
+ "epoch": 1.1312670920692798,
2779
+ "grad_norm": 1.5225576162338257,
2780
+ "learning_rate": 9.516474991552965e-06,
2781
+ "loss": 0.4243963062763214,
2782
+ "step": 388
2783
+ },
2784
+ {
2785
+ "epoch": 1.1341841385597082,
2786
+ "grad_norm": 1.7416809797286987,
2787
+ "learning_rate": 9.46562381238408e-06,
2788
+ "loss": 0.3414606750011444,
2789
+ "step": 389
2790
+ },
2791
+ {
2792
+ "epoch": 1.1371011850501367,
2793
+ "grad_norm": 1.8358951807022095,
2794
+ "learning_rate": 9.414786487227732e-06,
2795
+ "loss": 0.387447327375412,
2796
+ "step": 390
2797
+ },
2798
+ {
2799
+ "epoch": 1.1400182315405651,
2800
+ "grad_norm": 1.9706153869628906,
2801
+ "learning_rate": 9.363964334071057e-06,
2802
+ "loss": 0.4599088728427887,
2803
+ "step": 391
2804
+ },
2805
+ {
2806
+ "epoch": 1.1429352780309936,
2807
+ "grad_norm": 1.0604286193847656,
2808
+ "learning_rate": 9.313158670507843e-06,
2809
+ "loss": 0.4633581042289734,
2810
+ "step": 392
2811
+ },
2812
+ {
2813
+ "epoch": 1.145852324521422,
2814
+ "grad_norm": 1.4851202964782715,
2815
+ "learning_rate": 9.262370813704379e-06,
2816
+ "loss": 0.3872259557247162,
2817
+ "step": 393
2818
+ },
2819
+ {
2820
+ "epoch": 1.1487693710118505,
2821
+ "grad_norm": 1.7839159965515137,
2822
+ "learning_rate": 9.21160208036531e-06,
2823
+ "loss": 0.5215944647789001,
2824
+ "step": 394
2825
+ },
2826
+ {
2827
+ "epoch": 1.151686417502279,
2828
+ "grad_norm": 1.3054656982421875,
2829
+ "learning_rate": 9.160853786699475e-06,
2830
+ "loss": 0.4030425548553467,
2831
+ "step": 395
2832
+ },
2833
+ {
2834
+ "epoch": 1.1546034639927074,
2835
+ "grad_norm": 3.8467981815338135,
2836
+ "learning_rate": 9.110127248385827e-06,
2837
+ "loss": 0.4032524824142456,
2838
+ "step": 396
2839
+ },
2840
+ {
2841
+ "epoch": 1.1575205104831359,
2842
+ "grad_norm": 1.8513801097869873,
2843
+ "learning_rate": 9.05942378053928e-06,
2844
+ "loss": 0.46577155590057373,
2845
+ "step": 397
2846
+ },
2847
+ {
2848
+ "epoch": 1.1604375569735643,
2849
+ "grad_norm": 1.312689185142517,
2850
+ "learning_rate": 9.008744697676642e-06,
2851
+ "loss": 0.39114487171173096,
2852
+ "step": 398
2853
+ },
2854
+ {
2855
+ "epoch": 1.1633546034639928,
2856
+ "grad_norm": 1.1996328830718994,
2857
+ "learning_rate": 8.958091313682521e-06,
2858
+ "loss": 0.481199711561203,
2859
+ "step": 399
2860
+ },
2861
+ {
2862
+ "epoch": 1.1662716499544212,
2863
+ "grad_norm": 5.172409534454346,
2864
+ "learning_rate": 8.90746494177528e-06,
2865
+ "loss": 0.3803558945655823,
2866
+ "step": 400
2867
+ },
2868
+ {
2869
+ "epoch": 1.1662716499544212,
2870
+ "eval_loss": 0.4318464398384094,
2871
+ "eval_runtime": 1206.0306,
2872
+ "eval_samples_per_second": 0.524,
2873
+ "eval_steps_per_second": 0.524,
2874
+ "step": 400
2875
+ },
2876
+ {
2877
+ "epoch": 1.1691886964448497,
2878
+ "grad_norm": 1.0115015506744385,
2879
+ "learning_rate": 8.856866894472954e-06,
2880
+ "loss": 0.39636704325675964,
2881
+ "step": 401
2882
+ },
2883
+ {
2884
+ "epoch": 1.172105742935278,
2885
+ "grad_norm": 1.1557435989379883,
2886
+ "learning_rate": 8.806298483559268e-06,
2887
+ "loss": 0.4076298475265503,
2888
+ "step": 402
2889
+ },
2890
+ {
2891
+ "epoch": 1.1750227894257064,
2892
+ "grad_norm": 1.2802515029907227,
2893
+ "learning_rate": 8.755761020049597e-06,
2894
+ "loss": 0.44352248311042786,
2895
+ "step": 403
2896
+ },
2897
+ {
2898
+ "epoch": 1.1779398359161348,
2899
+ "grad_norm": 1.2755069732666016,
2900
+ "learning_rate": 8.705255814156988e-06,
2901
+ "loss": 0.390497624874115,
2902
+ "step": 404
2903
+ },
2904
+ {
2905
+ "epoch": 1.1808568824065633,
2906
+ "grad_norm": 1.2799782752990723,
2907
+ "learning_rate": 8.654784175258188e-06,
2908
+ "loss": 0.35810694098472595,
2909
+ "step": 405
2910
+ },
2911
+ {
2912
+ "epoch": 1.1837739288969917,
2913
+ "grad_norm": 1.0968674421310425,
2914
+ "learning_rate": 8.604347411859713e-06,
2915
+ "loss": 0.3890265226364136,
2916
+ "step": 406
2917
+ },
2918
+ {
2919
+ "epoch": 1.1866909753874202,
2920
+ "grad_norm": 1.3334455490112305,
2921
+ "learning_rate": 8.553946831563886e-06,
2922
+ "loss": 0.3916901648044586,
2923
+ "step": 407
2924
+ },
2925
+ {
2926
+ "epoch": 1.1896080218778486,
2927
+ "grad_norm": 1.1888184547424316,
2928
+ "learning_rate": 8.503583741034988e-06,
2929
+ "loss": 0.5231326222419739,
2930
+ "step": 408
2931
+ },
2932
+ {
2933
+ "epoch": 1.192525068368277,
2934
+ "grad_norm": 1.1163763999938965,
2935
+ "learning_rate": 8.45325944596534e-06,
2936
+ "loss": 0.4249858558177948,
2937
+ "step": 409
2938
+ },
2939
+ {
2940
+ "epoch": 1.1954421148587056,
2941
+ "grad_norm": 1.3470333814620972,
2942
+ "learning_rate": 8.40297525104148e-06,
2943
+ "loss": 0.5201632380485535,
2944
+ "step": 410
2945
+ },
2946
+ {
2947
+ "epoch": 1.198359161349134,
2948
+ "grad_norm": 1.5412285327911377,
2949
+ "learning_rate": 8.35273245991031e-06,
2950
+ "loss": 0.39376699924468994,
2951
+ "step": 411
2952
+ },
2953
+ {
2954
+ "epoch": 1.2012762078395625,
2955
+ "grad_norm": 1.3408735990524292,
2956
+ "learning_rate": 8.302532375145339e-06,
2957
+ "loss": 0.39554283022880554,
2958
+ "step": 412
2959
+ },
2960
+ {
2961
+ "epoch": 1.204193254329991,
2962
+ "grad_norm": 1.990668773651123,
2963
+ "learning_rate": 8.25237629821286e-06,
2964
+ "loss": 0.42424261569976807,
2965
+ "step": 413
2966
+ },
2967
+ {
2968
+ "epoch": 1.2071103008204194,
2969
+ "grad_norm": 1.6471989154815674,
2970
+ "learning_rate": 8.202265529438259e-06,
2971
+ "loss": 0.3234582543373108,
2972
+ "step": 414
2973
+ },
2974
+ {
2975
+ "epoch": 1.2100273473108478,
2976
+ "grad_norm": 1.1483631134033203,
2977
+ "learning_rate": 8.152201367972275e-06,
2978
+ "loss": 0.39163246750831604,
2979
+ "step": 415
2980
+ },
2981
+ {
2982
+ "epoch": 1.2129443938012763,
2983
+ "grad_norm": 1.800149917602539,
2984
+ "learning_rate": 8.102185111757323e-06,
2985
+ "loss": 0.5055042505264282,
2986
+ "step": 416
2987
+ },
2988
+ {
2989
+ "epoch": 1.2158614402917047,
2990
+ "grad_norm": 1.4394795894622803,
2991
+ "learning_rate": 8.052218057493849e-06,
2992
+ "loss": 0.4761751592159271,
2993
+ "step": 417
2994
+ },
2995
+ {
2996
+ "epoch": 1.2187784867821332,
2997
+ "grad_norm": 1.622689962387085,
2998
+ "learning_rate": 8.002301500606715e-06,
2999
+ "loss": 0.4490141272544861,
3000
+ "step": 418
3001
+ },
3002
+ {
3003
+ "epoch": 1.2216955332725616,
3004
+ "grad_norm": 1.2564961910247803,
3005
+ "learning_rate": 7.952436735211593e-06,
3006
+ "loss": 0.3964035212993622,
3007
+ "step": 419
3008
+ },
3009
+ {
3010
+ "epoch": 1.22461257976299,
3011
+ "grad_norm": 1.3248411417007446,
3012
+ "learning_rate": 7.902625054081449e-06,
3013
+ "loss": 0.46039122343063354,
3014
+ "step": 420
3015
+ },
3016
+ {
3017
+ "epoch": 1.2275296262534183,
3018
+ "grad_norm": 1.568983793258667,
3019
+ "learning_rate": 7.852867748613e-06,
3020
+ "loss": 0.49916595220565796,
3021
+ "step": 421
3022
+ },
3023
+ {
3024
+ "epoch": 1.2304466727438468,
3025
+ "grad_norm": 1.4784491062164307,
3026
+ "learning_rate": 7.803166108793243e-06,
3027
+ "loss": 0.4035068154335022,
3028
+ "step": 422
3029
+ },
3030
+ {
3031
+ "epoch": 1.2333637192342752,
3032
+ "grad_norm": 1.2940057516098022,
3033
+ "learning_rate": 7.753521423166007e-06,
3034
+ "loss": 0.4154140055179596,
3035
+ "step": 423
3036
+ },
3037
+ {
3038
+ "epoch": 1.2362807657247037,
3039
+ "grad_norm": 1.167786717414856,
3040
+ "learning_rate": 7.703934978798565e-06,
3041
+ "loss": 0.39541637897491455,
3042
+ "step": 424
3043
+ },
3044
+ {
3045
+ "epoch": 1.2391978122151321,
3046
+ "grad_norm": 1.5126771926879883,
3047
+ "learning_rate": 7.65440806124823e-06,
3048
+ "loss": 0.37744253873825073,
3049
+ "step": 425
3050
+ },
3051
+ {
3052
+ "epoch": 1.2421148587055606,
3053
+ "grad_norm": 1.2595263719558716,
3054
+ "learning_rate": 7.604941954529067e-06,
3055
+ "loss": 0.46380615234375,
3056
+ "step": 426
3057
+ },
3058
+ {
3059
+ "epoch": 1.245031905195989,
3060
+ "grad_norm": 1.4258298873901367,
3061
+ "learning_rate": 7.555537941078573e-06,
3062
+ "loss": 0.3391319513320923,
3063
+ "step": 427
3064
+ },
3065
+ {
3066
+ "epoch": 1.2479489516864175,
3067
+ "grad_norm": 1.5371774435043335,
3068
+ "learning_rate": 7.506197301724446e-06,
3069
+ "loss": 0.39805102348327637,
3070
+ "step": 428
3071
+ },
3072
+ {
3073
+ "epoch": 1.250865998176846,
3074
+ "grad_norm": 1.3789173364639282,
3075
+ "learning_rate": 7.456921315651371e-06,
3076
+ "loss": 0.37969034910202026,
3077
+ "step": 429
3078
+ },
3079
+ {
3080
+ "epoch": 1.2537830446672744,
3081
+ "grad_norm": 1.32931649684906,
3082
+ "learning_rate": 7.407711260367867e-06,
3083
+ "loss": 0.3841526508331299,
3084
+ "step": 430
3085
+ },
3086
+ {
3087
+ "epoch": 1.2567000911577029,
3088
+ "grad_norm": 1.2836817502975464,
3089
+ "learning_rate": 7.358568411673145e-06,
3090
+ "loss": 0.340289443731308,
3091
+ "step": 431
3092
+ },
3093
+ {
3094
+ "epoch": 1.2596171376481313,
3095
+ "grad_norm": 1.0418318510055542,
3096
+ "learning_rate": 7.309494043624059e-06,
3097
+ "loss": 0.44747158885002136,
3098
+ "step": 432
3099
+ },
3100
+ {
3101
+ "epoch": 1.2625341841385598,
3102
+ "grad_norm": 1.1769362688064575,
3103
+ "learning_rate": 7.260489428502058e-06,
3104
+ "loss": 0.45737382769584656,
3105
+ "step": 433
3106
+ },
3107
+ {
3108
+ "epoch": 1.265451230628988,
3109
+ "grad_norm": 2.2730748653411865,
3110
+ "learning_rate": 7.211555836780203e-06,
3111
+ "loss": 0.3827931582927704,
3112
+ "step": 434
3113
+ },
3114
+ {
3115
+ "epoch": 1.2683682771194165,
3116
+ "grad_norm": 1.263096809387207,
3117
+ "learning_rate": 7.162694537090235e-06,
3118
+ "loss": 0.3589435815811157,
3119
+ "step": 435
3120
+ },
3121
+ {
3122
+ "epoch": 1.271285323609845,
3123
+ "grad_norm": 1.4073514938354492,
3124
+ "learning_rate": 7.113906796189692e-06,
3125
+ "loss": 0.45206642150878906,
3126
+ "step": 436
3127
+ },
3128
+ {
3129
+ "epoch": 1.2742023701002734,
3130
+ "grad_norm": 1.064585566520691,
3131
+ "learning_rate": 7.0651938789290306e-06,
3132
+ "loss": 0.5409261584281921,
3133
+ "step": 437
3134
+ },
3135
+ {
3136
+ "epoch": 1.2771194165907018,
3137
+ "grad_norm": 1.2346999645233154,
3138
+ "learning_rate": 7.016557048218889e-06,
3139
+ "loss": 0.40680158138275146,
3140
+ "step": 438
3141
+ },
3142
+ {
3143
+ "epoch": 1.2800364630811303,
3144
+ "grad_norm": 1.5816547870635986,
3145
+ "learning_rate": 6.967997564997306e-06,
3146
+ "loss": 0.38718655705451965,
3147
+ "step": 439
3148
+ },
3149
+ {
3150
+ "epoch": 1.2829535095715587,
3151
+ "grad_norm": 1.085268259048462,
3152
+ "learning_rate": 6.919516688197041e-06,
3153
+ "loss": 0.4863276779651642,
3154
+ "step": 440
3155
+ },
3156
+ {
3157
+ "epoch": 1.2858705560619872,
3158
+ "grad_norm": 1.0984629392623901,
3159
+ "learning_rate": 6.871115674712937e-06,
3160
+ "loss": 0.39562875032424927,
3161
+ "step": 441
3162
+ },
3163
+ {
3164
+ "epoch": 1.2887876025524156,
3165
+ "grad_norm": 1.3004229068756104,
3166
+ "learning_rate": 6.822795779369339e-06,
3167
+ "loss": 0.44437694549560547,
3168
+ "step": 442
3169
+ },
3170
+ {
3171
+ "epoch": 1.291704649042844,
3172
+ "grad_norm": 1.3541183471679688,
3173
+ "learning_rate": 6.774558254887553e-06,
3174
+ "loss": 0.4728967249393463,
3175
+ "step": 443
3176
+ },
3177
+ {
3178
+ "epoch": 1.2946216955332726,
3179
+ "grad_norm": 1.2485377788543701,
3180
+ "learning_rate": 6.7264043518533695e-06,
3181
+ "loss": 0.4052809476852417,
3182
+ "step": 444
3183
+ },
3184
+ {
3185
+ "epoch": 1.297538742023701,
3186
+ "grad_norm": 1.412827730178833,
3187
+ "learning_rate": 6.67833531868465e-06,
3188
+ "loss": 0.40149861574172974,
3189
+ "step": 445
3190
+ },
3191
+ {
3192
+ "epoch": 1.3004557885141295,
3193
+ "grad_norm": 1.5576224327087402,
3194
+ "learning_rate": 6.630352401598953e-06,
3195
+ "loss": 0.44107240438461304,
3196
+ "step": 446
3197
+ },
3198
+ {
3199
+ "epoch": 1.303372835004558,
3200
+ "grad_norm": 1.1551047563552856,
3201
+ "learning_rate": 6.582456844581226e-06,
3202
+ "loss": 0.4898405969142914,
3203
+ "step": 447
3204
+ },
3205
+ {
3206
+ "epoch": 1.3062898814949864,
3207
+ "grad_norm": 1.9939689636230469,
3208
+ "learning_rate": 6.5346498893515645e-06,
3209
+ "loss": 0.4791329801082611,
3210
+ "step": 448
3211
+ },
3212
+ {
3213
+ "epoch": 1.3092069279854148,
3214
+ "grad_norm": 1.4782553911209106,
3215
+ "learning_rate": 6.486932775333002e-06,
3216
+ "loss": 0.472908616065979,
3217
+ "step": 449
3218
+ },
3219
+ {
3220
+ "epoch": 1.3121239744758433,
3221
+ "grad_norm": 1.2496148347854614,
3222
+ "learning_rate": 6.439306739619387e-06,
3223
+ "loss": 0.514995276927948,
3224
+ "step": 450
3225
+ },
3226
+ {
3227
+ "epoch": 1.3121239744758433,
3228
+ "eval_loss": 0.4178673028945923,
3229
+ "eval_runtime": 1197.5534,
3230
+ "eval_samples_per_second": 0.528,
3231
+ "eval_steps_per_second": 0.528,
3232
+ "step": 450
3233
+ },
3234
+ {
3235
+ "epoch": 1.3150410209662717,
3236
+ "grad_norm": 1.3996772766113281,
3237
+ "learning_rate": 6.391773016943316e-06,
3238
+ "loss": 0.4087896943092346,
3239
+ "step": 451
3240
+ },
3241
+ {
3242
+ "epoch": 1.3179580674567002,
3243
+ "grad_norm": 1.20390784740448,
3244
+ "learning_rate": 6.344332839644111e-06,
3245
+ "loss": 0.43224579095840454,
3246
+ "step": 452
3247
+ },
3248
+ {
3249
+ "epoch": 1.3208751139471286,
3250
+ "grad_norm": 1.2709496021270752,
3251
+ "learning_rate": 6.296987437635876e-06,
3252
+ "loss": 0.44104093313217163,
3253
+ "step": 453
3254
+ },
3255
+ {
3256
+ "epoch": 1.323792160437557,
3257
+ "grad_norm": 1.0112334489822388,
3258
+ "learning_rate": 6.249738038375618e-06,
3259
+ "loss": 0.47084498405456543,
3260
+ "step": 454
3261
+ },
3262
+ {
3263
+ "epoch": 1.3267092069279856,
3264
+ "grad_norm": 1.0771515369415283,
3265
+ "learning_rate": 6.202585866831411e-06,
3266
+ "loss": 0.4700928032398224,
3267
+ "step": 455
3268
+ },
3269
+ {
3270
+ "epoch": 1.3296262534184138,
3271
+ "grad_norm": 1.4937143325805664,
3272
+ "learning_rate": 6.15553214545064e-06,
3273
+ "loss": 0.345747709274292,
3274
+ "step": 456
3275
+ },
3276
+ {
3277
+ "epoch": 1.3325432999088422,
3278
+ "grad_norm": 1.1348456144332886,
3279
+ "learning_rate": 6.108578094128321e-06,
3280
+ "loss": 0.33824583888053894,
3281
+ "step": 457
3282
+ },
3283
+ {
3284
+ "epoch": 1.3354603463992707,
3285
+ "grad_norm": 1.2502707242965698,
3286
+ "learning_rate": 6.061724930175461e-06,
3287
+ "loss": 0.3528832197189331,
3288
+ "step": 458
3289
+ },
3290
+ {
3291
+ "epoch": 1.3383773928896991,
3292
+ "grad_norm": 1.5359619855880737,
3293
+ "learning_rate": 6.014973868287504e-06,
3294
+ "loss": 0.4413869082927704,
3295
+ "step": 459
3296
+ },
3297
+ {
3298
+ "epoch": 1.3412944393801276,
3299
+ "grad_norm": 0.9747081398963928,
3300
+ "learning_rate": 5.9683261205128395e-06,
3301
+ "loss": 0.6849499940872192,
3302
+ "step": 460
3303
+ },
3304
+ {
3305
+ "epoch": 1.344211485870556,
3306
+ "grad_norm": 1.3150533437728882,
3307
+ "learning_rate": 5.921782896221383e-06,
3308
+ "loss": 0.3901931047439575,
3309
+ "step": 461
3310
+ },
3311
+ {
3312
+ "epoch": 1.3471285323609845,
3313
+ "grad_norm": 1.137770652770996,
3314
+ "learning_rate": 5.875345402073207e-06,
3315
+ "loss": 0.37498384714126587,
3316
+ "step": 462
3317
+ },
3318
+ {
3319
+ "epoch": 1.350045578851413,
3320
+ "grad_norm": 1.2216367721557617,
3321
+ "learning_rate": 5.829014841987277e-06,
3322
+ "loss": 0.3874579966068268,
3323
+ "step": 463
3324
+ },
3325
+ {
3326
+ "epoch": 1.3529626253418414,
3327
+ "grad_norm": 1.135439157485962,
3328
+ "learning_rate": 5.782792417110233e-06,
3329
+ "loss": 0.384797066450119,
3330
+ "step": 464
3331
+ },
3332
+ {
3333
+ "epoch": 1.3558796718322699,
3334
+ "grad_norm": 1.2400696277618408,
3335
+ "learning_rate": 5.736679325785239e-06,
3336
+ "loss": 0.46303266286849976,
3337
+ "step": 465
3338
+ },
3339
+ {
3340
+ "epoch": 1.3587967183226983,
3341
+ "grad_norm": 1.8848882913589478,
3342
+ "learning_rate": 5.6906767635209304e-06,
3343
+ "loss": 0.5068309903144836,
3344
+ "step": 466
3345
+ },
3346
+ {
3347
+ "epoch": 1.3617137648131268,
3348
+ "grad_norm": 1.4707008600234985,
3349
+ "learning_rate": 5.644785922960412e-06,
3350
+ "loss": 0.364332914352417,
3351
+ "step": 467
3352
+ },
3353
+ {
3354
+ "epoch": 1.364630811303555,
3355
+ "grad_norm": 2.4436841011047363,
3356
+ "learning_rate": 5.599007993850329e-06,
3357
+ "loss": 0.485107421875,
3358
+ "step": 468
3359
+ },
3360
+ {
3361
+ "epoch": 1.3675478577939835,
3362
+ "grad_norm": 1.1924740076065063,
3363
+ "learning_rate": 5.553344163010039e-06,
3364
+ "loss": 0.34547489881515503,
3365
+ "step": 469
3366
+ },
3367
+ {
3368
+ "epoch": 1.370464904284412,
3369
+ "grad_norm": 1.1255877017974854,
3370
+ "learning_rate": 5.507795614300846e-06,
3371
+ "loss": 0.39645254611968994,
3372
+ "step": 470
3373
+ },
3374
+ {
3375
+ "epoch": 1.3733819507748404,
3376
+ "grad_norm": 1.0937018394470215,
3377
+ "learning_rate": 5.4623635285952815e-06,
3378
+ "loss": 0.4267856478691101,
3379
+ "step": 471
3380
+ },
3381
+ {
3382
+ "epoch": 1.3762989972652688,
3383
+ "grad_norm": 1.3355520963668823,
3384
+ "learning_rate": 5.417049083746513e-06,
3385
+ "loss": 0.3669992983341217,
3386
+ "step": 472
3387
+ },
3388
+ {
3389
+ "epoch": 1.3792160437556973,
3390
+ "grad_norm": 1.7302504777908325,
3391
+ "learning_rate": 5.3718534545578035e-06,
3392
+ "loss": 0.3873697519302368,
3393
+ "step": 473
3394
+ },
3395
+ {
3396
+ "epoch": 1.3821330902461257,
3397
+ "grad_norm": 1.17263662815094,
3398
+ "learning_rate": 5.326777812752041e-06,
3399
+ "loss": 0.4581540524959564,
3400
+ "step": 474
3401
+ },
3402
+ {
3403
+ "epoch": 1.3850501367365542,
3404
+ "grad_norm": 1.0998128652572632,
3405
+ "learning_rate": 5.281823326941377e-06,
3406
+ "loss": 0.43062761425971985,
3407
+ "step": 475
3408
+ },
3409
+ {
3410
+ "epoch": 1.3879671832269826,
3411
+ "grad_norm": 1.1194556951522827,
3412
+ "learning_rate": 5.236991162596932e-06,
3413
+ "loss": 0.381741464138031,
3414
+ "step": 476
3415
+ },
3416
+ {
3417
+ "epoch": 1.390884229717411,
3418
+ "grad_norm": 1.2759051322937012,
3419
+ "learning_rate": 5.19228248201856e-06,
3420
+ "loss": 0.49175748229026794,
3421
+ "step": 477
3422
+ },
3423
+ {
3424
+ "epoch": 1.3938012762078396,
3425
+ "grad_norm": 1.2134747505187988,
3426
+ "learning_rate": 5.147698444304732e-06,
3427
+ "loss": 0.4997562766075134,
3428
+ "step": 478
3429
+ },
3430
+ {
3431
+ "epoch": 1.396718322698268,
3432
+ "grad_norm": 1.0833078622817993,
3433
+ "learning_rate": 5.1032402053224804e-06,
3434
+ "loss": 0.42580488324165344,
3435
+ "step": 479
3436
+ },
3437
+ {
3438
+ "epoch": 1.3996353691886965,
3439
+ "grad_norm": 1.4838510751724243,
3440
+ "learning_rate": 5.058908917677426e-06,
3441
+ "loss": 0.5015593767166138,
3442
+ "step": 480
3443
+ },
3444
+ {
3445
+ "epoch": 1.402552415679125,
3446
+ "grad_norm": 1.218610167503357,
3447
+ "learning_rate": 5.014705730683904e-06,
3448
+ "loss": 0.34739193320274353,
3449
+ "step": 481
3450
+ },
3451
+ {
3452
+ "epoch": 1.4054694621695534,
3453
+ "grad_norm": 1.1883307695388794,
3454
+ "learning_rate": 4.970631790335181e-06,
3455
+ "loss": 0.41708022356033325,
3456
+ "step": 482
3457
+ },
3458
+ {
3459
+ "epoch": 1.4083865086599818,
3460
+ "grad_norm": 1.209291696548462,
3461
+ "learning_rate": 4.926688239273713e-06,
3462
+ "loss": 0.43546172976493835,
3463
+ "step": 483
3464
+ },
3465
+ {
3466
+ "epoch": 1.4113035551504103,
3467
+ "grad_norm": 1.0801606178283691,
3468
+ "learning_rate": 4.882876216761543e-06,
3469
+ "loss": 0.44491735100746155,
3470
+ "step": 484
3471
+ },
3472
+ {
3473
+ "epoch": 1.4142206016408387,
3474
+ "grad_norm": 1.2746628522872925,
3475
+ "learning_rate": 4.839196858650763e-06,
3476
+ "loss": 0.436122864484787,
3477
+ "step": 485
3478
+ },
3479
+ {
3480
+ "epoch": 1.4171376481312672,
3481
+ "grad_norm": 1.4465962648391724,
3482
+ "learning_rate": 4.795651297354056e-06,
3483
+ "loss": 0.3750447630882263,
3484
+ "step": 486
3485
+ },
3486
+ {
3487
+ "epoch": 1.4200546946216956,
3488
+ "grad_norm": 1.6736211776733398,
3489
+ "learning_rate": 4.752240661815346e-06,
3490
+ "loss": 0.38286519050598145,
3491
+ "step": 487
3492
+ },
3493
+ {
3494
+ "epoch": 1.422971741112124,
3495
+ "grad_norm": 1.1946996450424194,
3496
+ "learning_rate": 4.708966077480544e-06,
3497
+ "loss": 0.4488063156604767,
3498
+ "step": 488
3499
+ },
3500
+ {
3501
+ "epoch": 1.4258887876025526,
3502
+ "grad_norm": 1.42599356174469,
3503
+ "learning_rate": 4.665828666268335e-06,
3504
+ "loss": 0.44088613986968994,
3505
+ "step": 489
3506
+ },
3507
+ {
3508
+ "epoch": 1.4288058340929808,
3509
+ "grad_norm": 1.2281016111373901,
3510
+ "learning_rate": 4.622829546541121e-06,
3511
+ "loss": 0.4030645489692688,
3512
+ "step": 490
3513
+ },
3514
+ {
3515
+ "epoch": 1.4317228805834092,
3516
+ "grad_norm": 1.2875670194625854,
3517
+ "learning_rate": 4.57996983307602e-06,
3518
+ "loss": 0.44702020287513733,
3519
+ "step": 491
3520
+ },
3521
+ {
3522
+ "epoch": 1.4346399270738377,
3523
+ "grad_norm": 1.2456860542297363,
3524
+ "learning_rate": 4.537250637035947e-06,
3525
+ "loss": 0.4067370593547821,
3526
+ "step": 492
3527
+ },
3528
+ {
3529
+ "epoch": 1.4375569735642661,
3530
+ "grad_norm": 1.2822725772857666,
3531
+ "learning_rate": 4.494673065940833e-06,
3532
+ "loss": 0.4237740635871887,
3533
+ "step": 493
3534
+ },
3535
+ {
3536
+ "epoch": 1.4404740200546946,
3537
+ "grad_norm": 1.5517818927764893,
3538
+ "learning_rate": 4.452238223638906e-06,
3539
+ "loss": 0.40579724311828613,
3540
+ "step": 494
3541
+ },
3542
+ {
3543
+ "epoch": 1.443391066545123,
3544
+ "grad_norm": 1.275344967842102,
3545
+ "learning_rate": 4.409947210278056e-06,
3546
+ "loss": 0.38880717754364014,
3547
+ "step": 495
3548
+ },
3549
+ {
3550
+ "epoch": 1.4463081130355515,
3551
+ "grad_norm": 1.22952139377594,
3552
+ "learning_rate": 4.367801122277327e-06,
3553
+ "loss": 0.4042310416698456,
3554
+ "step": 496
3555
+ },
3556
+ {
3557
+ "epoch": 1.44922515952598,
3558
+ "grad_norm": 1.122261643409729,
3559
+ "learning_rate": 4.325801052298493e-06,
3560
+ "loss": 0.5408368110656738,
3561
+ "step": 497
3562
+ },
3563
+ {
3564
+ "epoch": 1.4521422060164084,
3565
+ "grad_norm": 1.5885361433029175,
3566
+ "learning_rate": 4.283948089217715e-06,
3567
+ "loss": 0.37697717547416687,
3568
+ "step": 498
3569
+ },
3570
+ {
3571
+ "epoch": 1.4550592525068369,
3572
+ "grad_norm": 2.3565149307250977,
3573
+ "learning_rate": 4.242243318097338e-06,
3574
+ "loss": 0.3811529576778412,
3575
+ "step": 499
3576
+ },
3577
+ {
3578
+ "epoch": 1.4579762989972653,
3579
+ "grad_norm": 1.1944137811660767,
3580
+ "learning_rate": 4.200687820157735e-06,
3581
+ "loss": 0.414781391620636,
3582
+ "step": 500
3583
+ },
3584
+ {
3585
+ "epoch": 1.4579762989972653,
3586
+ "eval_loss": 0.40706494450569153,
3587
+ "eval_runtime": 1189.1593,
3588
+ "eval_samples_per_second": 0.531,
3589
+ "eval_steps_per_second": 0.531,
3590
+ "step": 500
3591
+ }
3592
+ ],
3593
+ "logging_steps": 1,
3594
+ "max_steps": 686,
3595
+ "num_input_tokens_seen": 0,
3596
+ "num_train_epochs": 2,
3597
+ "save_steps": 100,
3598
+ "stateful_callbacks": {
3599
+ "TrainerControl": {
3600
+ "args": {
3601
+ "should_epoch_stop": false,
3602
+ "should_evaluate": false,
3603
+ "should_log": false,
3604
+ "should_save": true,
3605
+ "should_training_stop": false
3606
+ },
3607
+ "attributes": {}
3608
+ }
3609
+ },
3610
+ "total_flos": 4.6050925490932285e+18,
3611
+ "train_batch_size": 1,
3612
+ "trial_name": null,
3613
+ "trial_params": null
3614
+ }
cpt_devstral_24B/checkpoints/checkpoint-500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62526ec2433add7ac031c48b1f6ff360f1ade77275765112cbf7cf361d64ca5
3
+ size 5201
cpt_devstral_24B/checkpoints/checkpoint-600/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /workspace/Models/Devstral-Small-2-24B-Instruct-2512
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/workspace/Models/Devstral-Small-2-24B-Instruct-2512
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.18.0
cpt_devstral_24B/checkpoints/checkpoint-600/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "/workspace/Models/Devstral-Small-2-24B-Instruct-2512",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 128,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "k_proj",
33
+ "o_proj",
34
+ "q_proj",
35
+ "v_proj"
36
+ ],
37
+ "target_parameters": null,
38
+ "task_type": "CAUSAL_LM",
39
+ "trainable_token_indices": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
cpt_devstral_24B/checkpoints/checkpoint-600/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6528dd74de4fce9bff6c944acd9bc01868d155b1ea5403fe93fb8c5ced4d4ec
3
+ size 364983848
cpt_devstral_24B/checkpoints/checkpoint-600/chat_template.jinja ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {#- Default system message if no system prompt is passed. #}
2
+ {%- set default_system_message = '' %}
3
+
4
+ {#- Begin of sequence token. #}
5
+ {{- bos_token }}
6
+
7
+ {#- Handle system prompt if it exists. #}
8
+ {#- System prompt supports text content or text chunks. #}
9
+ {%- if messages[0]['role'] == 'system' %}
10
+ {{- '[SYSTEM_PROMPT]' -}}
11
+ {%- if messages[0]['content'] is string %}
12
+ {{- messages[0]['content'] -}}
13
+ {%- else %}
14
+ {%- for block in messages[0]['content'] %}
15
+ {%- if block['type'] == 'text' %}
16
+ {{- block['text'] }}
17
+ {%- else %}
18
+ {{- raise_exception('Only text chunks are supported in system message contents.') }}
19
+ {%- endif %}
20
+ {%- endfor %}
21
+ {%- endif %}
22
+ {{- '[/SYSTEM_PROMPT]' -}}
23
+ {%- set loop_messages = messages[1:] %}
24
+ {%- else %}
25
+ {%- set loop_messages = messages %}
26
+ {%- if default_system_message != '' %}
27
+ {{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }}
28
+ {%- endif %}
29
+ {%- endif %}
30
+
31
+
32
+ {#- Tools definition #}
33
+ {%- set tools_definition = '' %}
34
+ {%- set has_tools = false %}
35
+ {%- if tools is defined and tools is not none and tools|length > 0 %}
36
+ {%- set has_tools = true %}
37
+ {%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %}
38
+ {{- tools_definition }}
39
+ {%- endif %}
40
+
41
+ {#- Checks for alternating user/assistant messages. #}
42
+ {%- set ns = namespace(index=0) %}
43
+ {%- for message in loop_messages %}
44
+ {%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %}
45
+ {%- if (message['role'] == 'user') != (ns.index % 2 == 0) %}
46
+ {{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }}
47
+ {%- endif %}
48
+ {%- set ns.index = ns.index + 1 %}
49
+ {%- endif %}
50
+ {%- endfor %}
51
+
52
+ {#- Handle conversation messages. #}
53
+ {%- for message in loop_messages %}
54
+
55
+ {#- User messages supports text content or text and image chunks. #}
56
+ {%- if message['role'] == 'user' %}
57
+ {%- if message['content'] is string %}
58
+ {{- '[INST]' + message['content'] + '[/INST]' }}
59
+ {%- elif message['content'] | length > 0 %}
60
+ {{- '[INST]' }}
61
+ {%- if message['content'] | length == 2 %}
62
+ {%- set blocks = message['content'] | sort(attribute='type') %}
63
+ {%- else %}
64
+ {%- set blocks = message['content'] %}
65
+ {%- endif %}
66
+ {%- for block in blocks %}
67
+ {%- if block['type'] == 'text' %}
68
+ {{- block['text'] }}
69
+ {%- elif block['type'] in ['image', 'image_url'] %}
70
+ {{- '[IMG]' }}
71
+ {%- else %}
72
+ {{- raise_exception('Only text, image and image_url chunks are supported in user message content.') }}
73
+ {%- endif %}
74
+ {%- endfor %}
75
+ {{- '[/INST]' }}
76
+ {%- else %}
77
+ {{- raise_exception('User message must have a string or a list of chunks in content') }}
78
+ {%- endif %}
79
+
80
+ {#- Assistant messages supports text content or text and image chunks. #}
81
+ {%- elif message['role'] == 'assistant' %}
82
+ {%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}
83
+ {{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }}
84
+ {%- endif %}
85
+
86
+ {%- if message['content'] is string %}
87
+ {{- message['content'] }}
88
+ {%- elif message['content'] | length > 0 %}
89
+ {%- for block in message['content'] %}
90
+ {%- if block['type'] == 'text' %}
91
+ {{- block['text'] }}
92
+ {%- else %}
93
+ {{- raise_exception('Only text chunks are supported in assistant message contents.') }}
94
+ {%- endif %}
95
+ {%- endfor %}
96
+ {%- endif %}
97
+
98
+ {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %}
99
+ {%- for tool in message['tool_calls'] %}
100
+ {%- set arguments = tool['function']['arguments'] %}
101
+ {%- if arguments is not string %}
102
+ {%- set arguments = arguments|tojson|safe %}
103
+ {%- elif arguments == '' %}
104
+ {%- set arguments = '{}' %}
105
+ {%- endif %}
106
+ {{- '[TOOL_CALLS]' + tool['function']['name'] + '[ARGS]' + arguments }}
107
+ {%- endfor %}
108
+ {%- endif %}
109
+
110
+ {#- End of sequence token for each assistant messages. #}
111
+ {{- eos_token }}
112
+
113
+ {#- Tool messages only supports text content. #}
114
+ {%- elif message['role'] == 'tool' %}
115
+ {{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }}
116
+
117
+ {#- Raise exception for unsupported roles. #}
118
+ {%- else %}
119
+ {{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }}
120
+ {%- endif %}
121
+ {%- endfor %}
cpt_devstral_24B/checkpoints/checkpoint-600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9b2e44d0ab7459e766b2b426fe5e300025849ada2eb46e1e2d89ca430a99f5
3
+ size 160131559
cpt_devstral_24B/checkpoints/checkpoint-600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:225b67663a6b759f77f860fb03e0bd5eaf5759053344c810157aab3c54e1e986
3
+ size 14645
cpt_devstral_24B/checkpoints/checkpoint-600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e3d2efb23ab02a563acff2988a53e730d4e5d08f3c1c39f1bd998cc5047ea45
3
+ size 1465
cpt_devstral_24B/checkpoints/checkpoint-600/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286acad9b0e27fce778ac429763536accf618ccb6ed72963b6f94685e531c5c7
3
+ size 17077402
cpt_devstral_24B/checkpoints/checkpoint-600/tokenizer_config.json ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "extra_special_tokens": [
6
+ "<unk>",
7
+ "<s>",
8
+ "</s>",
9
+ "[INST]",
10
+ "[/INST]",
11
+ "[AVAILABLE_TOOLS]",
12
+ "[/AVAILABLE_TOOLS]",
13
+ "[TOOL_RESULTS]",
14
+ "[/TOOL_RESULTS]",
15
+ "[TOOL_CALLS]",
16
+ "[IMG]",
17
+ "<pad>",
18
+ "[IMG_BREAK]",
19
+ "[IMG_END]",
20
+ "[PREFIX]",
21
+ "[MIDDLE]",
22
+ "[SUFFIX]",
23
+ "[SYSTEM_PROMPT]",
24
+ "[/SYSTEM_PROMPT]",
25
+ "[TOOL_CONTENT]",
26
+ "<SPECIAL_20>",
27
+ "<SPECIAL_21>",
28
+ "<SPECIAL_22>",
29
+ "<SPECIAL_23>",
30
+ "[AUDIO]",
31
+ "[BEGIN_AUDIO]",
32
+ "<SPECIAL_26>",
33
+ "<SPECIAL_27>",
34
+ "<SPECIAL_28>",
35
+ "<SPECIAL_29>",
36
+ "<SPECIAL_30>",
37
+ "<SPECIAL_31>",
38
+ "[ARGS]",
39
+ "[CALL_ID]",
40
+ "[THINK]",
41
+ "[/THINK]",
42
+ "<SPECIAL_36>",
43
+ "<SPECIAL_37>",
44
+ "<SPECIAL_38>",
45
+ "<SPECIAL_39>",
46
+ "<SPECIAL_40>",
47
+ "<SPECIAL_41>",
48
+ "<SPECIAL_42>",
49
+ "<SPECIAL_43>",
50
+ "<SPECIAL_44>",
51
+ "<SPECIAL_45>",
52
+ "<SPECIAL_46>",
53
+ "<SPECIAL_47>",
54
+ "<SPECIAL_48>",
55
+ "<SPECIAL_49>",
56
+ "<SPECIAL_50>",
57
+ "<SPECIAL_51>",
58
+ "<SPECIAL_52>",
59
+ "<SPECIAL_53>",
60
+ "<SPECIAL_54>",
61
+ "<SPECIAL_55>",
62
+ "<SPECIAL_56>",
63
+ "<SPECIAL_57>",
64
+ "<SPECIAL_58>",
65
+ "<SPECIAL_59>",
66
+ "<SPECIAL_60>",
67
+ "<SPECIAL_61>",
68
+ "<SPECIAL_62>",
69
+ "<SPECIAL_63>",
70
+ "<SPECIAL_64>",
71
+ "<SPECIAL_65>",
72
+ "<SPECIAL_66>",
73
+ "<SPECIAL_67>",
74
+ "<SPECIAL_68>",
75
+ "<SPECIAL_69>",
76
+ "<SPECIAL_70>",
77
+ "<SPECIAL_71>",
78
+ "<SPECIAL_72>",
79
+ "<SPECIAL_73>",
80
+ "<SPECIAL_74>",
81
+ "<SPECIAL_75>",
82
+ "<SPECIAL_76>",
83
+ "<SPECIAL_77>",
84
+ "<SPECIAL_78>",
85
+ "<SPECIAL_79>",
86
+ "<SPECIAL_80>",
87
+ "<SPECIAL_81>",
88
+ "<SPECIAL_82>",
89
+ "<SPECIAL_83>",
90
+ "<SPECIAL_84>",
91
+ "<SPECIAL_85>",
92
+ "<SPECIAL_86>",
93
+ "<SPECIAL_87>",
94
+ "<SPECIAL_88>",
95
+ "<SPECIAL_89>",
96
+ "<SPECIAL_90>",
97
+ "<SPECIAL_91>",
98
+ "<SPECIAL_92>",
99
+ "<SPECIAL_93>",
100
+ "<SPECIAL_94>",
101
+ "<SPECIAL_95>",
102
+ "<SPECIAL_96>",
103
+ "<SPECIAL_97>",
104
+ "<SPECIAL_98>",
105
+ "<SPECIAL_99>",
106
+ "<SPECIAL_100>",
107
+ "<SPECIAL_101>",
108
+ "<SPECIAL_102>",
109
+ "<SPECIAL_103>",
110
+ "<SPECIAL_104>",
111
+ "<SPECIAL_105>",
112
+ "<SPECIAL_106>",
113
+ "<SPECIAL_107>",
114
+ "<SPECIAL_108>",
115
+ "<SPECIAL_109>",
116
+ "<SPECIAL_110>",
117
+ "<SPECIAL_111>",
118
+ "<SPECIAL_112>",
119
+ "<SPECIAL_113>",
120
+ "<SPECIAL_114>",
121
+ "<SPECIAL_115>",
122
+ "<SPECIAL_116>",
123
+ "<SPECIAL_117>",
124
+ "<SPECIAL_118>",
125
+ "<SPECIAL_119>",
126
+ "<SPECIAL_120>",
127
+ "<SPECIAL_121>",
128
+ "<SPECIAL_122>",
129
+ "<SPECIAL_123>",
130
+ "<SPECIAL_124>",
131
+ "<SPECIAL_125>",
132
+ "<SPECIAL_126>",
133
+ "<SPECIAL_127>",
134
+ "<SPECIAL_128>",
135
+ "<SPECIAL_129>",
136
+ "<SPECIAL_130>",
137
+ "<SPECIAL_131>",
138
+ "<SPECIAL_132>",
139
+ "<SPECIAL_133>",
140
+ "<SPECIAL_134>",
141
+ "<SPECIAL_135>",
142
+ "<SPECIAL_136>",
143
+ "<SPECIAL_137>",
144
+ "<SPECIAL_138>",
145
+ "<SPECIAL_139>",
146
+ "<SPECIAL_140>",
147
+ "<SPECIAL_141>",
148
+ "<SPECIAL_142>",
149
+ "<SPECIAL_143>",
150
+ "<SPECIAL_144>",
151
+ "<SPECIAL_145>",
152
+ "<SPECIAL_146>",
153
+ "<SPECIAL_147>",
154
+ "<SPECIAL_148>",
155
+ "<SPECIAL_149>",
156
+ "<SPECIAL_150>",
157
+ "<SPECIAL_151>",
158
+ "<SPECIAL_152>",
159
+ "<SPECIAL_153>",
160
+ "<SPECIAL_154>",
161
+ "<SPECIAL_155>",
162
+ "<SPECIAL_156>",
163
+ "<SPECIAL_157>",
164
+ "<SPECIAL_158>",
165
+ "<SPECIAL_159>",
166
+ "<SPECIAL_160>",
167
+ "<SPECIAL_161>",
168
+ "<SPECIAL_162>",
169
+ "<SPECIAL_163>",
170
+ "<SPECIAL_164>",
171
+ "<SPECIAL_165>",
172
+ "<SPECIAL_166>",
173
+ "<SPECIAL_167>",
174
+ "<SPECIAL_168>",
175
+ "<SPECIAL_169>",
176
+ "<SPECIAL_170>",
177
+ "<SPECIAL_171>",
178
+ "<SPECIAL_172>",
179
+ "<SPECIAL_173>",
180
+ "<SPECIAL_174>",
181
+ "<SPECIAL_175>",
182
+ "<SPECIAL_176>",
183
+ "<SPECIAL_177>",
184
+ "<SPECIAL_178>",
185
+ "<SPECIAL_179>",
186
+ "<SPECIAL_180>",
187
+ "<SPECIAL_181>",
188
+ "<SPECIAL_182>",
189
+ "<SPECIAL_183>",
190
+ "<SPECIAL_184>",
191
+ "<SPECIAL_185>",
192
+ "<SPECIAL_186>",
193
+ "<SPECIAL_187>",
194
+ "<SPECIAL_188>",
195
+ "<SPECIAL_189>",
196
+ "<SPECIAL_190>",
197
+ "<SPECIAL_191>",
198
+ "<SPECIAL_192>",
199
+ "<SPECIAL_193>",
200
+ "<SPECIAL_194>",
201
+ "<SPECIAL_195>",
202
+ "<SPECIAL_196>",
203
+ "<SPECIAL_197>",
204
+ "<SPECIAL_198>",
205
+ "<SPECIAL_199>",
206
+ "<SPECIAL_200>",
207
+ "<SPECIAL_201>",
208
+ "<SPECIAL_202>",
209
+ "<SPECIAL_203>",
210
+ "<SPECIAL_204>",
211
+ "<SPECIAL_205>",
212
+ "<SPECIAL_206>",
213
+ "<SPECIAL_207>",
214
+ "<SPECIAL_208>",
215
+ "<SPECIAL_209>",
216
+ "<SPECIAL_210>",
217
+ "<SPECIAL_211>",
218
+ "<SPECIAL_212>",
219
+ "<SPECIAL_213>",
220
+ "<SPECIAL_214>",
221
+ "<SPECIAL_215>",
222
+ "<SPECIAL_216>",
223
+ "<SPECIAL_217>",
224
+ "<SPECIAL_218>",
225
+ "<SPECIAL_219>",
226
+ "<SPECIAL_220>",
227
+ "<SPECIAL_221>",
228
+ "<SPECIAL_222>",
229
+ "<SPECIAL_223>",
230
+ "<SPECIAL_224>",
231
+ "<SPECIAL_225>",
232
+ "<SPECIAL_226>",
233
+ "<SPECIAL_227>",
234
+ "<SPECIAL_228>",
235
+ "<SPECIAL_229>",
236
+ "<SPECIAL_230>",
237
+ "<SPECIAL_231>",
238
+ "<SPECIAL_232>",
239
+ "<SPECIAL_233>",
240
+ "<SPECIAL_234>",
241
+ "<SPECIAL_235>",
242
+ "<SPECIAL_236>",
243
+ "<SPECIAL_237>",
244
+ "<SPECIAL_238>",
245
+ "<SPECIAL_239>",
246
+ "<SPECIAL_240>",
247
+ "<SPECIAL_241>",
248
+ "<SPECIAL_242>",
249
+ "<SPECIAL_243>",
250
+ "<SPECIAL_244>",
251
+ "<SPECIAL_245>",
252
+ "<SPECIAL_246>",
253
+ "<SPECIAL_247>",
254
+ "<SPECIAL_248>",
255
+ "<SPECIAL_249>",
256
+ "<SPECIAL_250>",
257
+ "<SPECIAL_251>",
258
+ "<SPECIAL_252>",
259
+ "<SPECIAL_253>",
260
+ "<SPECIAL_254>",
261
+ "<SPECIAL_255>",
262
+ "<SPECIAL_256>",
263
+ "<SPECIAL_257>",
264
+ "<SPECIAL_258>",
265
+ "<SPECIAL_259>",
266
+ "<SPECIAL_260>",
267
+ "<SPECIAL_261>",
268
+ "<SPECIAL_262>",
269
+ "<SPECIAL_263>",
270
+ "<SPECIAL_264>",
271
+ "<SPECIAL_265>",
272
+ "<SPECIAL_266>",
273
+ "<SPECIAL_267>",
274
+ "<SPECIAL_268>",
275
+ "<SPECIAL_269>",
276
+ "<SPECIAL_270>",
277
+ "<SPECIAL_271>",
278
+ "<SPECIAL_272>",
279
+ "<SPECIAL_273>",
280
+ "<SPECIAL_274>",
281
+ "<SPECIAL_275>",
282
+ "<SPECIAL_276>",
283
+ "<SPECIAL_277>",
284
+ "<SPECIAL_278>",
285
+ "<SPECIAL_279>",
286
+ "<SPECIAL_280>",
287
+ "<SPECIAL_281>",
288
+ "<SPECIAL_282>",
289
+ "<SPECIAL_283>",
290
+ "<SPECIAL_284>",
291
+ "<SPECIAL_285>",
292
+ "<SPECIAL_286>",
293
+ "<SPECIAL_287>",
294
+ "<SPECIAL_288>",
295
+ "<SPECIAL_289>",
296
+ "<SPECIAL_290>",
297
+ "<SPECIAL_291>",
298
+ "<SPECIAL_292>",
299
+ "<SPECIAL_293>",
300
+ "<SPECIAL_294>",
301
+ "<SPECIAL_295>",
302
+ "<SPECIAL_296>",
303
+ "<SPECIAL_297>",
304
+ "<SPECIAL_298>",
305
+ "<SPECIAL_299>",
306
+ "<SPECIAL_300>",
307
+ "<SPECIAL_301>",
308
+ "<SPECIAL_302>",
309
+ "<SPECIAL_303>",
310
+ "<SPECIAL_304>",
311
+ "<SPECIAL_305>",
312
+ "<SPECIAL_306>",
313
+ "<SPECIAL_307>",
314
+ "<SPECIAL_308>",
315
+ "<SPECIAL_309>",
316
+ "<SPECIAL_310>",
317
+ "<SPECIAL_311>",
318
+ "<SPECIAL_312>",
319
+ "<SPECIAL_313>",
320
+ "<SPECIAL_314>",
321
+ "<SPECIAL_315>",
322
+ "<SPECIAL_316>",
323
+ "<SPECIAL_317>",
324
+ "<SPECIAL_318>",
325
+ "<SPECIAL_319>",
326
+ "<SPECIAL_320>",
327
+ "<SPECIAL_321>",
328
+ "<SPECIAL_322>",
329
+ "<SPECIAL_323>",
330
+ "<SPECIAL_324>",
331
+ "<SPECIAL_325>",
332
+ "<SPECIAL_326>",
333
+ "<SPECIAL_327>",
334
+ "<SPECIAL_328>",
335
+ "<SPECIAL_329>",
336
+ "<SPECIAL_330>",
337
+ "<SPECIAL_331>",
338
+ "<SPECIAL_332>",
339
+ "<SPECIAL_333>",
340
+ "<SPECIAL_334>",
341
+ "<SPECIAL_335>",
342
+ "<SPECIAL_336>",
343
+ "<SPECIAL_337>",
344
+ "<SPECIAL_338>",
345
+ "<SPECIAL_339>",
346
+ "<SPECIAL_340>",
347
+ "<SPECIAL_341>",
348
+ "<SPECIAL_342>",
349
+ "<SPECIAL_343>",
350
+ "<SPECIAL_344>",
351
+ "<SPECIAL_345>",
352
+ "<SPECIAL_346>",
353
+ "<SPECIAL_347>",
354
+ "<SPECIAL_348>",
355
+ "<SPECIAL_349>",
356
+ "<SPECIAL_350>",
357
+ "<SPECIAL_351>",
358
+ "<SPECIAL_352>",
359
+ "<SPECIAL_353>",
360
+ "<SPECIAL_354>",
361
+ "<SPECIAL_355>",
362
+ "<SPECIAL_356>",
363
+ "<SPECIAL_357>",
364
+ "<SPECIAL_358>",
365
+ "<SPECIAL_359>",
366
+ "<SPECIAL_360>",
367
+ "<SPECIAL_361>",
368
+ "<SPECIAL_362>",
369
+ "<SPECIAL_363>",
370
+ "<SPECIAL_364>",
371
+ "<SPECIAL_365>",
372
+ "<SPECIAL_366>",
373
+ "<SPECIAL_367>",
374
+ "<SPECIAL_368>",
375
+ "<SPECIAL_369>",
376
+ "<SPECIAL_370>",
377
+ "<SPECIAL_371>",
378
+ "<SPECIAL_372>",
379
+ "<SPECIAL_373>",
380
+ "<SPECIAL_374>",
381
+ "<SPECIAL_375>",
382
+ "<SPECIAL_376>",
383
+ "<SPECIAL_377>",
384
+ "<SPECIAL_378>",
385
+ "<SPECIAL_379>",
386
+ "<SPECIAL_380>",
387
+ "<SPECIAL_381>",
388
+ "<SPECIAL_382>",
389
+ "<SPECIAL_383>",
390
+ "<SPECIAL_384>",
391
+ "<SPECIAL_385>",
392
+ "<SPECIAL_386>",
393
+ "<SPECIAL_387>",
394
+ "<SPECIAL_388>",
395
+ "<SPECIAL_389>",
396
+ "<SPECIAL_390>",
397
+ "<SPECIAL_391>",
398
+ "<SPECIAL_392>",
399
+ "<SPECIAL_393>",
400
+ "<SPECIAL_394>",
401
+ "<SPECIAL_395>",
402
+ "<SPECIAL_396>",
403
+ "<SPECIAL_397>",
404
+ "<SPECIAL_398>",
405
+ "<SPECIAL_399>",
406
+ "<SPECIAL_400>",
407
+ "<SPECIAL_401>",
408
+ "<SPECIAL_402>",
409
+ "<SPECIAL_403>",
410
+ "<SPECIAL_404>",
411
+ "<SPECIAL_405>",
412
+ "<SPECIAL_406>",
413
+ "<SPECIAL_407>",
414
+ "<SPECIAL_408>",
415
+ "<SPECIAL_409>",
416
+ "<SPECIAL_410>",
417
+ "<SPECIAL_411>",
418
+ "<SPECIAL_412>",
419
+ "<SPECIAL_413>",
420
+ "<SPECIAL_414>",
421
+ "<SPECIAL_415>",
422
+ "<SPECIAL_416>",
423
+ "<SPECIAL_417>",
424
+ "<SPECIAL_418>",
425
+ "<SPECIAL_419>",
426
+ "<SPECIAL_420>",
427
+ "<SPECIAL_421>",
428
+ "<SPECIAL_422>",
429
+ "<SPECIAL_423>",
430
+ "<SPECIAL_424>",
431
+ "<SPECIAL_425>",
432
+ "<SPECIAL_426>",
433
+ "<SPECIAL_427>",
434
+ "<SPECIAL_428>",
435
+ "<SPECIAL_429>",
436
+ "<SPECIAL_430>",
437
+ "<SPECIAL_431>",
438
+ "<SPECIAL_432>",
439
+ "<SPECIAL_433>",
440
+ "<SPECIAL_434>",
441
+ "<SPECIAL_435>",
442
+ "<SPECIAL_436>",
443
+ "<SPECIAL_437>",
444
+ "<SPECIAL_438>",
445
+ "<SPECIAL_439>",
446
+ "<SPECIAL_440>",
447
+ "<SPECIAL_441>",
448
+ "<SPECIAL_442>",
449
+ "<SPECIAL_443>",
450
+ "<SPECIAL_444>",
451
+ "<SPECIAL_445>",
452
+ "<SPECIAL_446>",
453
+ "<SPECIAL_447>",
454
+ "<SPECIAL_448>",
455
+ "<SPECIAL_449>",
456
+ "<SPECIAL_450>",
457
+ "<SPECIAL_451>",
458
+ "<SPECIAL_452>",
459
+ "<SPECIAL_453>",
460
+ "<SPECIAL_454>",
461
+ "<SPECIAL_455>",
462
+ "<SPECIAL_456>",
463
+ "<SPECIAL_457>",
464
+ "<SPECIAL_458>",
465
+ "<SPECIAL_459>",
466
+ "<SPECIAL_460>",
467
+ "<SPECIAL_461>",
468
+ "<SPECIAL_462>",
469
+ "<SPECIAL_463>",
470
+ "<SPECIAL_464>",
471
+ "<SPECIAL_465>",
472
+ "<SPECIAL_466>",
473
+ "<SPECIAL_467>",
474
+ "<SPECIAL_468>",
475
+ "<SPECIAL_469>",
476
+ "<SPECIAL_470>",
477
+ "<SPECIAL_471>",
478
+ "<SPECIAL_472>",
479
+ "<SPECIAL_473>",
480
+ "<SPECIAL_474>",
481
+ "<SPECIAL_475>",
482
+ "<SPECIAL_476>",
483
+ "<SPECIAL_477>",
484
+ "<SPECIAL_478>",
485
+ "<SPECIAL_479>",
486
+ "<SPECIAL_480>",
487
+ "<SPECIAL_481>",
488
+ "<SPECIAL_482>",
489
+ "<SPECIAL_483>",
490
+ "<SPECIAL_484>",
491
+ "<SPECIAL_485>",
492
+ "<SPECIAL_486>",
493
+ "<SPECIAL_487>",
494
+ "<SPECIAL_488>",
495
+ "<SPECIAL_489>",
496
+ "<SPECIAL_490>",
497
+ "<SPECIAL_491>",
498
+ "<SPECIAL_492>",
499
+ "<SPECIAL_493>",
500
+ "<SPECIAL_494>",
501
+ "<SPECIAL_495>",
502
+ "<SPECIAL_496>",
503
+ "<SPECIAL_497>",
504
+ "<SPECIAL_498>",
505
+ "<SPECIAL_499>",
506
+ "<SPECIAL_500>",
507
+ "<SPECIAL_501>",
508
+ "<SPECIAL_502>",
509
+ "<SPECIAL_503>",
510
+ "<SPECIAL_504>",
511
+ "<SPECIAL_505>",
512
+ "<SPECIAL_506>",
513
+ "<SPECIAL_507>",
514
+ "<SPECIAL_508>",
515
+ "<SPECIAL_509>",
516
+ "<SPECIAL_510>",
517
+ "<SPECIAL_511>",
518
+ "<SPECIAL_512>",
519
+ "<SPECIAL_513>",
520
+ "<SPECIAL_514>",
521
+ "<SPECIAL_515>",
522
+ "<SPECIAL_516>",
523
+ "<SPECIAL_517>",
524
+ "<SPECIAL_518>",
525
+ "<SPECIAL_519>",
526
+ "<SPECIAL_520>",
527
+ "<SPECIAL_521>",
528
+ "<SPECIAL_522>",
529
+ "<SPECIAL_523>",
530
+ "<SPECIAL_524>",
531
+ "<SPECIAL_525>",
532
+ "<SPECIAL_526>",
533
+ "<SPECIAL_527>",
534
+ "<SPECIAL_528>",
535
+ "<SPECIAL_529>",
536
+ "<SPECIAL_530>",
537
+ "<SPECIAL_531>",
538
+ "<SPECIAL_532>",
539
+ "<SPECIAL_533>",
540
+ "<SPECIAL_534>",
541
+ "<SPECIAL_535>",
542
+ "<SPECIAL_536>",
543
+ "<SPECIAL_537>",
544
+ "<SPECIAL_538>",
545
+ "<SPECIAL_539>",
546
+ "<SPECIAL_540>",
547
+ "<SPECIAL_541>",
548
+ "<SPECIAL_542>",
549
+ "<SPECIAL_543>",
550
+ "<SPECIAL_544>",
551
+ "<SPECIAL_545>",
552
+ "<SPECIAL_546>",
553
+ "<SPECIAL_547>",
554
+ "<SPECIAL_548>",
555
+ "<SPECIAL_549>",
556
+ "<SPECIAL_550>",
557
+ "<SPECIAL_551>",
558
+ "<SPECIAL_552>",
559
+ "<SPECIAL_553>",
560
+ "<SPECIAL_554>",
561
+ "<SPECIAL_555>",
562
+ "<SPECIAL_556>",
563
+ "<SPECIAL_557>",
564
+ "<SPECIAL_558>",
565
+ "<SPECIAL_559>",
566
+ "<SPECIAL_560>",
567
+ "<SPECIAL_561>",
568
+ "<SPECIAL_562>",
569
+ "<SPECIAL_563>",
570
+ "<SPECIAL_564>",
571
+ "<SPECIAL_565>",
572
+ "<SPECIAL_566>",
573
+ "<SPECIAL_567>",
574
+ "<SPECIAL_568>",
575
+ "<SPECIAL_569>",
576
+ "<SPECIAL_570>",
577
+ "<SPECIAL_571>",
578
+ "<SPECIAL_572>",
579
+ "<SPECIAL_573>",
580
+ "<SPECIAL_574>",
581
+ "<SPECIAL_575>",
582
+ "<SPECIAL_576>",
583
+ "<SPECIAL_577>",
584
+ "<SPECIAL_578>",
585
+ "<SPECIAL_579>",
586
+ "<SPECIAL_580>",
587
+ "<SPECIAL_581>",
588
+ "<SPECIAL_582>",
589
+ "<SPECIAL_583>",
590
+ "<SPECIAL_584>",
591
+ "<SPECIAL_585>",
592
+ "<SPECIAL_586>",
593
+ "<SPECIAL_587>",
594
+ "<SPECIAL_588>",
595
+ "<SPECIAL_589>",
596
+ "<SPECIAL_590>",
597
+ "<SPECIAL_591>",
598
+ "<SPECIAL_592>",
599
+ "<SPECIAL_593>",
600
+ "<SPECIAL_594>",
601
+ "<SPECIAL_595>",
602
+ "<SPECIAL_596>",
603
+ "<SPECIAL_597>",
604
+ "<SPECIAL_598>",
605
+ "<SPECIAL_599>",
606
+ "<SPECIAL_600>",
607
+ "<SPECIAL_601>",
608
+ "<SPECIAL_602>",
609
+ "<SPECIAL_603>",
610
+ "<SPECIAL_604>",
611
+ "<SPECIAL_605>",
612
+ "<SPECIAL_606>",
613
+ "<SPECIAL_607>",
614
+ "<SPECIAL_608>",
615
+ "<SPECIAL_609>",
616
+ "<SPECIAL_610>",
617
+ "<SPECIAL_611>",
618
+ "<SPECIAL_612>",
619
+ "<SPECIAL_613>",
620
+ "<SPECIAL_614>",
621
+ "<SPECIAL_615>",
622
+ "<SPECIAL_616>",
623
+ "<SPECIAL_617>",
624
+ "<SPECIAL_618>",
625
+ "<SPECIAL_619>",
626
+ "<SPECIAL_620>",
627
+ "<SPECIAL_621>",
628
+ "<SPECIAL_622>",
629
+ "<SPECIAL_623>",
630
+ "<SPECIAL_624>",
631
+ "<SPECIAL_625>",
632
+ "<SPECIAL_626>",
633
+ "<SPECIAL_627>",
634
+ "<SPECIAL_628>",
635
+ "<SPECIAL_629>",
636
+ "<SPECIAL_630>",
637
+ "<SPECIAL_631>",
638
+ "<SPECIAL_632>",
639
+ "<SPECIAL_633>",
640
+ "<SPECIAL_634>",
641
+ "<SPECIAL_635>",
642
+ "<SPECIAL_636>",
643
+ "<SPECIAL_637>",
644
+ "<SPECIAL_638>",
645
+ "<SPECIAL_639>",
646
+ "<SPECIAL_640>",
647
+ "<SPECIAL_641>",
648
+ "<SPECIAL_642>",
649
+ "<SPECIAL_643>",
650
+ "<SPECIAL_644>",
651
+ "<SPECIAL_645>",
652
+ "<SPECIAL_646>",
653
+ "<SPECIAL_647>",
654
+ "<SPECIAL_648>",
655
+ "<SPECIAL_649>",
656
+ "<SPECIAL_650>",
657
+ "<SPECIAL_651>",
658
+ "<SPECIAL_652>",
659
+ "<SPECIAL_653>",
660
+ "<SPECIAL_654>",
661
+ "<SPECIAL_655>",
662
+ "<SPECIAL_656>",
663
+ "<SPECIAL_657>",
664
+ "<SPECIAL_658>",
665
+ "<SPECIAL_659>",
666
+ "<SPECIAL_660>",
667
+ "<SPECIAL_661>",
668
+ "<SPECIAL_662>",
669
+ "<SPECIAL_663>",
670
+ "<SPECIAL_664>",
671
+ "<SPECIAL_665>",
672
+ "<SPECIAL_666>",
673
+ "<SPECIAL_667>",
674
+ "<SPECIAL_668>",
675
+ "<SPECIAL_669>",
676
+ "<SPECIAL_670>",
677
+ "<SPECIAL_671>",
678
+ "<SPECIAL_672>",
679
+ "<SPECIAL_673>",
680
+ "<SPECIAL_674>",
681
+ "<SPECIAL_675>",
682
+ "<SPECIAL_676>",
683
+ "<SPECIAL_677>",
684
+ "<SPECIAL_678>",
685
+ "<SPECIAL_679>",
686
+ "<SPECIAL_680>",
687
+ "<SPECIAL_681>",
688
+ "<SPECIAL_682>",
689
+ "<SPECIAL_683>",
690
+ "<SPECIAL_684>",
691
+ "<SPECIAL_685>",
692
+ "<SPECIAL_686>",
693
+ "<SPECIAL_687>",
694
+ "<SPECIAL_688>",
695
+ "<SPECIAL_689>",
696
+ "<SPECIAL_690>",
697
+ "<SPECIAL_691>",
698
+ "<SPECIAL_692>",
699
+ "<SPECIAL_693>",
700
+ "<SPECIAL_694>",
701
+ "<SPECIAL_695>",
702
+ "<SPECIAL_696>",
703
+ "<SPECIAL_697>",
704
+ "<SPECIAL_698>",
705
+ "<SPECIAL_699>",
706
+ "<SPECIAL_700>",
707
+ "<SPECIAL_701>",
708
+ "<SPECIAL_702>",
709
+ "<SPECIAL_703>",
710
+ "<SPECIAL_704>",
711
+ "<SPECIAL_705>",
712
+ "<SPECIAL_706>",
713
+ "<SPECIAL_707>",
714
+ "<SPECIAL_708>",
715
+ "<SPECIAL_709>",
716
+ "<SPECIAL_710>",
717
+ "<SPECIAL_711>",
718
+ "<SPECIAL_712>",
719
+ "<SPECIAL_713>",
720
+ "<SPECIAL_714>",
721
+ "<SPECIAL_715>",
722
+ "<SPECIAL_716>",
723
+ "<SPECIAL_717>",
724
+ "<SPECIAL_718>",
725
+ "<SPECIAL_719>",
726
+ "<SPECIAL_720>",
727
+ "<SPECIAL_721>",
728
+ "<SPECIAL_722>",
729
+ "<SPECIAL_723>",
730
+ "<SPECIAL_724>",
731
+ "<SPECIAL_725>",
732
+ "<SPECIAL_726>",
733
+ "<SPECIAL_727>",
734
+ "<SPECIAL_728>",
735
+ "<SPECIAL_729>",
736
+ "<SPECIAL_730>",
737
+ "<SPECIAL_731>",
738
+ "<SPECIAL_732>",
739
+ "<SPECIAL_733>",
740
+ "<SPECIAL_734>",
741
+ "<SPECIAL_735>",
742
+ "<SPECIAL_736>",
743
+ "<SPECIAL_737>",
744
+ "<SPECIAL_738>",
745
+ "<SPECIAL_739>",
746
+ "<SPECIAL_740>",
747
+ "<SPECIAL_741>",
748
+ "<SPECIAL_742>",
749
+ "<SPECIAL_743>",
750
+ "<SPECIAL_744>",
751
+ "<SPECIAL_745>",
752
+ "<SPECIAL_746>",
753
+ "<SPECIAL_747>",
754
+ "<SPECIAL_748>",
755
+ "<SPECIAL_749>",
756
+ "<SPECIAL_750>",
757
+ "<SPECIAL_751>",
758
+ "<SPECIAL_752>",
759
+ "<SPECIAL_753>",
760
+ "<SPECIAL_754>",
761
+ "<SPECIAL_755>",
762
+ "<SPECIAL_756>",
763
+ "<SPECIAL_757>",
764
+ "<SPECIAL_758>",
765
+ "<SPECIAL_759>",
766
+ "<SPECIAL_760>",
767
+ "<SPECIAL_761>",
768
+ "<SPECIAL_762>",
769
+ "<SPECIAL_763>",
770
+ "<SPECIAL_764>",
771
+ "<SPECIAL_765>",
772
+ "<SPECIAL_766>",
773
+ "<SPECIAL_767>",
774
+ "<SPECIAL_768>",
775
+ "<SPECIAL_769>",
776
+ "<SPECIAL_770>",
777
+ "<SPECIAL_771>",
778
+ "<SPECIAL_772>",
779
+ "<SPECIAL_773>",
780
+ "<SPECIAL_774>",
781
+ "<SPECIAL_775>",
782
+ "<SPECIAL_776>",
783
+ "<SPECIAL_777>",
784
+ "<SPECIAL_778>",
785
+ "<SPECIAL_779>",
786
+ "<SPECIAL_780>",
787
+ "<SPECIAL_781>",
788
+ "<SPECIAL_782>",
789
+ "<SPECIAL_783>",
790
+ "<SPECIAL_784>",
791
+ "<SPECIAL_785>",
792
+ "<SPECIAL_786>",
793
+ "<SPECIAL_787>",
794
+ "<SPECIAL_788>",
795
+ "<SPECIAL_789>",
796
+ "<SPECIAL_790>",
797
+ "<SPECIAL_791>",
798
+ "<SPECIAL_792>",
799
+ "<SPECIAL_793>",
800
+ "<SPECIAL_794>",
801
+ "<SPECIAL_795>",
802
+ "<SPECIAL_796>",
803
+ "<SPECIAL_797>",
804
+ "<SPECIAL_798>",
805
+ "<SPECIAL_799>",
806
+ "<SPECIAL_800>",
807
+ "<SPECIAL_801>",
808
+ "<SPECIAL_802>",
809
+ "<SPECIAL_803>",
810
+ "<SPECIAL_804>",
811
+ "<SPECIAL_805>",
812
+ "<SPECIAL_806>",
813
+ "<SPECIAL_807>",
814
+ "<SPECIAL_808>",
815
+ "<SPECIAL_809>",
816
+ "<SPECIAL_810>",
817
+ "<SPECIAL_811>",
818
+ "<SPECIAL_812>",
819
+ "<SPECIAL_813>",
820
+ "<SPECIAL_814>",
821
+ "<SPECIAL_815>",
822
+ "<SPECIAL_816>",
823
+ "<SPECIAL_817>",
824
+ "<SPECIAL_818>",
825
+ "<SPECIAL_819>",
826
+ "<SPECIAL_820>",
827
+ "<SPECIAL_821>",
828
+ "<SPECIAL_822>",
829
+ "<SPECIAL_823>",
830
+ "<SPECIAL_824>",
831
+ "<SPECIAL_825>",
832
+ "<SPECIAL_826>",
833
+ "<SPECIAL_827>",
834
+ "<SPECIAL_828>",
835
+ "<SPECIAL_829>",
836
+ "<SPECIAL_830>",
837
+ "<SPECIAL_831>",
838
+ "<SPECIAL_832>",
839
+ "<SPECIAL_833>",
840
+ "<SPECIAL_834>",
841
+ "<SPECIAL_835>",
842
+ "<SPECIAL_836>",
843
+ "<SPECIAL_837>",
844
+ "<SPECIAL_838>",
845
+ "<SPECIAL_839>",
846
+ "<SPECIAL_840>",
847
+ "<SPECIAL_841>",
848
+ "<SPECIAL_842>",
849
+ "<SPECIAL_843>",
850
+ "<SPECIAL_844>",
851
+ "<SPECIAL_845>",
852
+ "<SPECIAL_846>",
853
+ "<SPECIAL_847>",
854
+ "<SPECIAL_848>",
855
+ "<SPECIAL_849>",
856
+ "<SPECIAL_850>",
857
+ "<SPECIAL_851>",
858
+ "<SPECIAL_852>",
859
+ "<SPECIAL_853>",
860
+ "<SPECIAL_854>",
861
+ "<SPECIAL_855>",
862
+ "<SPECIAL_856>",
863
+ "<SPECIAL_857>",
864
+ "<SPECIAL_858>",
865
+ "<SPECIAL_859>",
866
+ "<SPECIAL_860>",
867
+ "<SPECIAL_861>",
868
+ "<SPECIAL_862>",
869
+ "<SPECIAL_863>",
870
+ "<SPECIAL_864>",
871
+ "<SPECIAL_865>",
872
+ "<SPECIAL_866>",
873
+ "<SPECIAL_867>",
874
+ "<SPECIAL_868>",
875
+ "<SPECIAL_869>",
876
+ "<SPECIAL_870>",
877
+ "<SPECIAL_871>",
878
+ "<SPECIAL_872>",
879
+ "<SPECIAL_873>",
880
+ "<SPECIAL_874>",
881
+ "<SPECIAL_875>",
882
+ "<SPECIAL_876>",
883
+ "<SPECIAL_877>",
884
+ "<SPECIAL_878>",
885
+ "<SPECIAL_879>",
886
+ "<SPECIAL_880>",
887
+ "<SPECIAL_881>",
888
+ "<SPECIAL_882>",
889
+ "<SPECIAL_883>",
890
+ "<SPECIAL_884>",
891
+ "<SPECIAL_885>",
892
+ "<SPECIAL_886>",
893
+ "<SPECIAL_887>",
894
+ "<SPECIAL_888>",
895
+ "<SPECIAL_889>",
896
+ "<SPECIAL_890>",
897
+ "<SPECIAL_891>",
898
+ "<SPECIAL_892>",
899
+ "<SPECIAL_893>",
900
+ "<SPECIAL_894>",
901
+ "<SPECIAL_895>",
902
+ "<SPECIAL_896>",
903
+ "<SPECIAL_897>",
904
+ "<SPECIAL_898>",
905
+ "<SPECIAL_899>",
906
+ "<SPECIAL_900>",
907
+ "<SPECIAL_901>",
908
+ "<SPECIAL_902>",
909
+ "<SPECIAL_903>",
910
+ "<SPECIAL_904>",
911
+ "<SPECIAL_905>",
912
+ "<SPECIAL_906>",
913
+ "<SPECIAL_907>",
914
+ "<SPECIAL_908>",
915
+ "<SPECIAL_909>",
916
+ "<SPECIAL_910>",
917
+ "<SPECIAL_911>",
918
+ "<SPECIAL_912>",
919
+ "<SPECIAL_913>",
920
+ "<SPECIAL_914>",
921
+ "<SPECIAL_915>",
922
+ "<SPECIAL_916>",
923
+ "<SPECIAL_917>",
924
+ "<SPECIAL_918>",
925
+ "<SPECIAL_919>",
926
+ "<SPECIAL_920>",
927
+ "<SPECIAL_921>",
928
+ "<SPECIAL_922>",
929
+ "<SPECIAL_923>",
930
+ "<SPECIAL_924>",
931
+ "<SPECIAL_925>",
932
+ "<SPECIAL_926>",
933
+ "<SPECIAL_927>",
934
+ "<SPECIAL_928>",
935
+ "<SPECIAL_929>",
936
+ "<SPECIAL_930>",
937
+ "<SPECIAL_931>",
938
+ "<SPECIAL_932>",
939
+ "<SPECIAL_933>",
940
+ "<SPECIAL_934>",
941
+ "<SPECIAL_935>",
942
+ "<SPECIAL_936>",
943
+ "<SPECIAL_937>",
944
+ "<SPECIAL_938>",
945
+ "<SPECIAL_939>",
946
+ "<SPECIAL_940>",
947
+ "<SPECIAL_941>",
948
+ "<SPECIAL_942>",
949
+ "<SPECIAL_943>",
950
+ "<SPECIAL_944>",
951
+ "<SPECIAL_945>",
952
+ "<SPECIAL_946>",
953
+ "<SPECIAL_947>",
954
+ "<SPECIAL_948>",
955
+ "<SPECIAL_949>",
956
+ "<SPECIAL_950>",
957
+ "<SPECIAL_951>",
958
+ "<SPECIAL_952>",
959
+ "<SPECIAL_953>",
960
+ "<SPECIAL_954>",
961
+ "<SPECIAL_955>",
962
+ "<SPECIAL_956>",
963
+ "<SPECIAL_957>",
964
+ "<SPECIAL_958>",
965
+ "<SPECIAL_959>",
966
+ "<SPECIAL_960>",
967
+ "<SPECIAL_961>",
968
+ "<SPECIAL_962>",
969
+ "<SPECIAL_963>",
970
+ "<SPECIAL_964>",
971
+ "<SPECIAL_965>",
972
+ "<SPECIAL_966>",
973
+ "<SPECIAL_967>",
974
+ "<SPECIAL_968>",
975
+ "<SPECIAL_969>",
976
+ "<SPECIAL_970>",
977
+ "<SPECIAL_971>",
978
+ "<SPECIAL_972>",
979
+ "<SPECIAL_973>",
980
+ "<SPECIAL_974>",
981
+ "<SPECIAL_975>",
982
+ "<SPECIAL_976>",
983
+ "<SPECIAL_977>",
984
+ "<SPECIAL_978>",
985
+ "<SPECIAL_979>",
986
+ "<SPECIAL_980>",
987
+ "<SPECIAL_981>",
988
+ "<SPECIAL_982>",
989
+ "<SPECIAL_983>",
990
+ "<SPECIAL_984>",
991
+ "<SPECIAL_985>",
992
+ "<SPECIAL_986>",
993
+ "<SPECIAL_987>",
994
+ "<SPECIAL_988>",
995
+ "<SPECIAL_989>",
996
+ "<SPECIAL_990>",
997
+ "<SPECIAL_991>",
998
+ "<SPECIAL_992>",
999
+ "<SPECIAL_993>",
1000
+ "<SPECIAL_994>",
1001
+ "<SPECIAL_995>",
1002
+ "<SPECIAL_996>",
1003
+ "<SPECIAL_997>",
1004
+ "<SPECIAL_998>",
1005
+ "<SPECIAL_999>"
1006
+ ],
1007
+ "is_local": true,
1008
+ "model_max_length": 1000000000000000019884624838656,
1009
+ "pad_token": "<pad>",
1010
+ "processor_class": "PixtralProcessor",
1011
+ "tokenizer_class": "TokenizersBackend",
1012
+ "unk_token": "<unk>"
1013
+ }
cpt_devstral_24B/checkpoints/checkpoint-600/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
cpt_devstral_24B/checkpoints/checkpoint-600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62526ec2433add7ac031c48b1f6ff360f1ade77275765112cbf7cf361d64ca5
3
+ size 5201
cpt_devstral_24B/checkpoints/checkpoint-686/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /workspace/Models/Devstral-Small-2-24B-Instruct-2512
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/workspace/Models/Devstral-Small-2-24B-Instruct-2512
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.18.0
cpt_devstral_24B/checkpoints/checkpoint-686/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "/workspace/Models/Devstral-Small-2-24B-Instruct-2512",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 128,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "k_proj",
33
+ "o_proj",
34
+ "q_proj",
35
+ "v_proj"
36
+ ],
37
+ "target_parameters": null,
38
+ "task_type": "CAUSAL_LM",
39
+ "trainable_token_indices": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
cpt_devstral_24B/checkpoints/checkpoint-686/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03251a15616d79fc9469a2a39740e675ba2e6c64b84f82511ff46a4917a1a103
3
+ size 364983848
cpt_devstral_24B/checkpoints/checkpoint-686/chat_template.jinja ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {#- Default system message if no system prompt is passed. #}
2
+ {%- set default_system_message = '' %}
3
+
4
+ {#- Begin of sequence token. #}
5
+ {{- bos_token }}
6
+
7
+ {#- Handle system prompt if it exists. #}
8
+ {#- System prompt supports text content or text chunks. #}
9
+ {%- if messages[0]['role'] == 'system' %}
10
+ {{- '[SYSTEM_PROMPT]' -}}
11
+ {%- if messages[0]['content'] is string %}
12
+ {{- messages[0]['content'] -}}
13
+ {%- else %}
14
+ {%- for block in messages[0]['content'] %}
15
+ {%- if block['type'] == 'text' %}
16
+ {{- block['text'] }}
17
+ {%- else %}
18
+ {{- raise_exception('Only text chunks are supported in system message contents.') }}
19
+ {%- endif %}
20
+ {%- endfor %}
21
+ {%- endif %}
22
+ {{- '[/SYSTEM_PROMPT]' -}}
23
+ {%- set loop_messages = messages[1:] %}
24
+ {%- else %}
25
+ {%- set loop_messages = messages %}
26
+ {%- if default_system_message != '' %}
27
+ {{- '[SYSTEM_PROMPT]' + default_system_message + '[/SYSTEM_PROMPT]' }}
28
+ {%- endif %}
29
+ {%- endif %}
30
+
31
+
32
+ {#- Tools definition #}
33
+ {%- set tools_definition = '' %}
34
+ {%- set has_tools = false %}
35
+ {%- if tools is defined and tools is not none and tools|length > 0 %}
36
+ {%- set has_tools = true %}
37
+ {%- set tools_definition = '[AVAILABLE_TOOLS]' + (tools| tojson) + '[/AVAILABLE_TOOLS]' %}
38
+ {{- tools_definition }}
39
+ {%- endif %}
40
+
41
+ {#- Checks for alternating user/assistant messages. #}
42
+ {%- set ns = namespace(index=0) %}
43
+ {%- for message in loop_messages %}
44
+ {%- if message.role == 'user' or (message.role == 'assistant' and (message.tool_calls is not defined or message.tool_calls is none or message.tool_calls | length == 0)) %}
45
+ {%- if (message['role'] == 'user') != (ns.index % 2 == 0) %}
46
+ {{- raise_exception('After the optional system message, conversation roles must alternate user and assistant roles except for tool calls and results.') }}
47
+ {%- endif %}
48
+ {%- set ns.index = ns.index + 1 %}
49
+ {%- endif %}
50
+ {%- endfor %}
51
+
52
+ {#- Handle conversation messages. #}
53
+ {%- for message in loop_messages %}
54
+
55
+ {#- User messages supports text content or text and image chunks. #}
56
+ {%- if message['role'] == 'user' %}
57
+ {%- if message['content'] is string %}
58
+ {{- '[INST]' + message['content'] + '[/INST]' }}
59
+ {%- elif message['content'] | length > 0 %}
60
+ {{- '[INST]' }}
61
+ {%- if message['content'] | length == 2 %}
62
+ {%- set blocks = message['content'] | sort(attribute='type') %}
63
+ {%- else %}
64
+ {%- set blocks = message['content'] %}
65
+ {%- endif %}
66
+ {%- for block in blocks %}
67
+ {%- if block['type'] == 'text' %}
68
+ {{- block['text'] }}
69
+ {%- elif block['type'] in ['image', 'image_url'] %}
70
+ {{- '[IMG]' }}
71
+ {%- else %}
72
+ {{- raise_exception('Only text, image and image_url chunks are supported in user message content.') }}
73
+ {%- endif %}
74
+ {%- endfor %}
75
+ {{- '[/INST]' }}
76
+ {%- else %}
77
+ {{- raise_exception('User message must have a string or a list of chunks in content') }}
78
+ {%- endif %}
79
+
80
+ {#- Assistant messages supports text content or text and image chunks. #}
81
+ {%- elif message['role'] == 'assistant' %}
82
+ {%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}
83
+ {{- raise_exception('Assistant message must have a string or a list of chunks in content or a list of tool calls.') }}
84
+ {%- endif %}
85
+
86
+ {%- if message['content'] is string %}
87
+ {{- message['content'] }}
88
+ {%- elif message['content'] | length > 0 %}
89
+ {%- for block in message['content'] %}
90
+ {%- if block['type'] == 'text' %}
91
+ {{- block['text'] }}
92
+ {%- else %}
93
+ {{- raise_exception('Only text chunks are supported in assistant message contents.') }}
94
+ {%- endif %}
95
+ {%- endfor %}
96
+ {%- endif %}
97
+
98
+ {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 %}
99
+ {%- for tool in message['tool_calls'] %}
100
+ {%- set arguments = tool['function']['arguments'] %}
101
+ {%- if arguments is not string %}
102
+ {%- set arguments = arguments|tojson|safe %}
103
+ {%- elif arguments == '' %}
104
+ {%- set arguments = '{}' %}
105
+ {%- endif %}
106
+ {{- '[TOOL_CALLS]' + tool['function']['name'] + '[ARGS]' + arguments }}
107
+ {%- endfor %}
108
+ {%- endif %}
109
+
110
+ {#- End of sequence token for each assistant messages. #}
111
+ {{- eos_token }}
112
+
113
+ {#- Tool messages only supports text content. #}
114
+ {%- elif message['role'] == 'tool' %}
115
+ {{- '[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]' }}
116
+
117
+ {#- Raise exception for unsupported roles. #}
118
+ {%- else %}
119
+ {{- raise_exception('Only user, assistant and tool roles are supported, got ' + message['role'] + '.') }}
120
+ {%- endif %}
121
+ {%- endfor %}
cpt_devstral_24B/checkpoints/checkpoint-686/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03f935371d01b010e5f58a9120e2561936056ae567db9fe04fb52a1b63061363
3
+ size 160131559
cpt_devstral_24B/checkpoints/checkpoint-686/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e706874d7a72e865503ef7a4aaf06a0ded1324badabfab0b0223627edcf671a8
3
+ size 14645
cpt_devstral_24B/checkpoints/checkpoint-686/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac75df5c8a3fb8b97098c85f14d52dc911b665df45efe2b05fbc8192aba4e49f
3
+ size 1465
cpt_devstral_24B/checkpoints/checkpoint-686/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286acad9b0e27fce778ac429763536accf618ccb6ed72963b6f94685e531c5c7
3
+ size 17077402
cpt_devstral_24B/checkpoints/checkpoint-686/tokenizer_config.json ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "extra_special_tokens": [
6
+ "<unk>",
7
+ "<s>",
8
+ "</s>",
9
+ "[INST]",
10
+ "[/INST]",
11
+ "[AVAILABLE_TOOLS]",
12
+ "[/AVAILABLE_TOOLS]",
13
+ "[TOOL_RESULTS]",
14
+ "[/TOOL_RESULTS]",
15
+ "[TOOL_CALLS]",
16
+ "[IMG]",
17
+ "<pad>",
18
+ "[IMG_BREAK]",
19
+ "[IMG_END]",
20
+ "[PREFIX]",
21
+ "[MIDDLE]",
22
+ "[SUFFIX]",
23
+ "[SYSTEM_PROMPT]",
24
+ "[/SYSTEM_PROMPT]",
25
+ "[TOOL_CONTENT]",
26
+ "<SPECIAL_20>",
27
+ "<SPECIAL_21>",
28
+ "<SPECIAL_22>",
29
+ "<SPECIAL_23>",
30
+ "[AUDIO]",
31
+ "[BEGIN_AUDIO]",
32
+ "<SPECIAL_26>",
33
+ "<SPECIAL_27>",
34
+ "<SPECIAL_28>",
35
+ "<SPECIAL_29>",
36
+ "<SPECIAL_30>",
37
+ "<SPECIAL_31>",
38
+ "[ARGS]",
39
+ "[CALL_ID]",
40
+ "[THINK]",
41
+ "[/THINK]",
42
+ "<SPECIAL_36>",
43
+ "<SPECIAL_37>",
44
+ "<SPECIAL_38>",
45
+ "<SPECIAL_39>",
46
+ "<SPECIAL_40>",
47
+ "<SPECIAL_41>",
48
+ "<SPECIAL_42>",
49
+ "<SPECIAL_43>",
50
+ "<SPECIAL_44>",
51
+ "<SPECIAL_45>",
52
+ "<SPECIAL_46>",
53
+ "<SPECIAL_47>",
54
+ "<SPECIAL_48>",
55
+ "<SPECIAL_49>",
56
+ "<SPECIAL_50>",
57
+ "<SPECIAL_51>",
58
+ "<SPECIAL_52>",
59
+ "<SPECIAL_53>",
60
+ "<SPECIAL_54>",
61
+ "<SPECIAL_55>",
62
+ "<SPECIAL_56>",
63
+ "<SPECIAL_57>",
64
+ "<SPECIAL_58>",
65
+ "<SPECIAL_59>",
66
+ "<SPECIAL_60>",
67
+ "<SPECIAL_61>",
68
+ "<SPECIAL_62>",
69
+ "<SPECIAL_63>",
70
+ "<SPECIAL_64>",
71
+ "<SPECIAL_65>",
72
+ "<SPECIAL_66>",
73
+ "<SPECIAL_67>",
74
+ "<SPECIAL_68>",
75
+ "<SPECIAL_69>",
76
+ "<SPECIAL_70>",
77
+ "<SPECIAL_71>",
78
+ "<SPECIAL_72>",
79
+ "<SPECIAL_73>",
80
+ "<SPECIAL_74>",
81
+ "<SPECIAL_75>",
82
+ "<SPECIAL_76>",
83
+ "<SPECIAL_77>",
84
+ "<SPECIAL_78>",
85
+ "<SPECIAL_79>",
86
+ "<SPECIAL_80>",
87
+ "<SPECIAL_81>",
88
+ "<SPECIAL_82>",
89
+ "<SPECIAL_83>",
90
+ "<SPECIAL_84>",
91
+ "<SPECIAL_85>",
92
+ "<SPECIAL_86>",
93
+ "<SPECIAL_87>",
94
+ "<SPECIAL_88>",
95
+ "<SPECIAL_89>",
96
+ "<SPECIAL_90>",
97
+ "<SPECIAL_91>",
98
+ "<SPECIAL_92>",
99
+ "<SPECIAL_93>",
100
+ "<SPECIAL_94>",
101
+ "<SPECIAL_95>",
102
+ "<SPECIAL_96>",
103
+ "<SPECIAL_97>",
104
+ "<SPECIAL_98>",
105
+ "<SPECIAL_99>",
106
+ "<SPECIAL_100>",
107
+ "<SPECIAL_101>",
108
+ "<SPECIAL_102>",
109
+ "<SPECIAL_103>",
110
+ "<SPECIAL_104>",
111
+ "<SPECIAL_105>",
112
+ "<SPECIAL_106>",
113
+ "<SPECIAL_107>",
114
+ "<SPECIAL_108>",
115
+ "<SPECIAL_109>",
116
+ "<SPECIAL_110>",
117
+ "<SPECIAL_111>",
118
+ "<SPECIAL_112>",
119
+ "<SPECIAL_113>",
120
+ "<SPECIAL_114>",
121
+ "<SPECIAL_115>",
122
+ "<SPECIAL_116>",
123
+ "<SPECIAL_117>",
124
+ "<SPECIAL_118>",
125
+ "<SPECIAL_119>",
126
+ "<SPECIAL_120>",
127
+ "<SPECIAL_121>",
128
+ "<SPECIAL_122>",
129
+ "<SPECIAL_123>",
130
+ "<SPECIAL_124>",
131
+ "<SPECIAL_125>",
132
+ "<SPECIAL_126>",
133
+ "<SPECIAL_127>",
134
+ "<SPECIAL_128>",
135
+ "<SPECIAL_129>",
136
+ "<SPECIAL_130>",
137
+ "<SPECIAL_131>",
138
+ "<SPECIAL_132>",
139
+ "<SPECIAL_133>",
140
+ "<SPECIAL_134>",
141
+ "<SPECIAL_135>",
142
+ "<SPECIAL_136>",
143
+ "<SPECIAL_137>",
144
+ "<SPECIAL_138>",
145
+ "<SPECIAL_139>",
146
+ "<SPECIAL_140>",
147
+ "<SPECIAL_141>",
148
+ "<SPECIAL_142>",
149
+ "<SPECIAL_143>",
150
+ "<SPECIAL_144>",
151
+ "<SPECIAL_145>",
152
+ "<SPECIAL_146>",
153
+ "<SPECIAL_147>",
154
+ "<SPECIAL_148>",
155
+ "<SPECIAL_149>",
156
+ "<SPECIAL_150>",
157
+ "<SPECIAL_151>",
158
+ "<SPECIAL_152>",
159
+ "<SPECIAL_153>",
160
+ "<SPECIAL_154>",
161
+ "<SPECIAL_155>",
162
+ "<SPECIAL_156>",
163
+ "<SPECIAL_157>",
164
+ "<SPECIAL_158>",
165
+ "<SPECIAL_159>",
166
+ "<SPECIAL_160>",
167
+ "<SPECIAL_161>",
168
+ "<SPECIAL_162>",
169
+ "<SPECIAL_163>",
170
+ "<SPECIAL_164>",
171
+ "<SPECIAL_165>",
172
+ "<SPECIAL_166>",
173
+ "<SPECIAL_167>",
174
+ "<SPECIAL_168>",
175
+ "<SPECIAL_169>",
176
+ "<SPECIAL_170>",
177
+ "<SPECIAL_171>",
178
+ "<SPECIAL_172>",
179
+ "<SPECIAL_173>",
180
+ "<SPECIAL_174>",
181
+ "<SPECIAL_175>",
182
+ "<SPECIAL_176>",
183
+ "<SPECIAL_177>",
184
+ "<SPECIAL_178>",
185
+ "<SPECIAL_179>",
186
+ "<SPECIAL_180>",
187
+ "<SPECIAL_181>",
188
+ "<SPECIAL_182>",
189
+ "<SPECIAL_183>",
190
+ "<SPECIAL_184>",
191
+ "<SPECIAL_185>",
192
+ "<SPECIAL_186>",
193
+ "<SPECIAL_187>",
194
+ "<SPECIAL_188>",
195
+ "<SPECIAL_189>",
196
+ "<SPECIAL_190>",
197
+ "<SPECIAL_191>",
198
+ "<SPECIAL_192>",
199
+ "<SPECIAL_193>",
200
+ "<SPECIAL_194>",
201
+ "<SPECIAL_195>",
202
+ "<SPECIAL_196>",
203
+ "<SPECIAL_197>",
204
+ "<SPECIAL_198>",
205
+ "<SPECIAL_199>",
206
+ "<SPECIAL_200>",
207
+ "<SPECIAL_201>",
208
+ "<SPECIAL_202>",
209
+ "<SPECIAL_203>",
210
+ "<SPECIAL_204>",
211
+ "<SPECIAL_205>",
212
+ "<SPECIAL_206>",
213
+ "<SPECIAL_207>",
214
+ "<SPECIAL_208>",
215
+ "<SPECIAL_209>",
216
+ "<SPECIAL_210>",
217
+ "<SPECIAL_211>",
218
+ "<SPECIAL_212>",
219
+ "<SPECIAL_213>",
220
+ "<SPECIAL_214>",
221
+ "<SPECIAL_215>",
222
+ "<SPECIAL_216>",
223
+ "<SPECIAL_217>",
224
+ "<SPECIAL_218>",
225
+ "<SPECIAL_219>",
226
+ "<SPECIAL_220>",
227
+ "<SPECIAL_221>",
228
+ "<SPECIAL_222>",
229
+ "<SPECIAL_223>",
230
+ "<SPECIAL_224>",
231
+ "<SPECIAL_225>",
232
+ "<SPECIAL_226>",
233
+ "<SPECIAL_227>",
234
+ "<SPECIAL_228>",
235
+ "<SPECIAL_229>",
236
+ "<SPECIAL_230>",
237
+ "<SPECIAL_231>",
238
+ "<SPECIAL_232>",
239
+ "<SPECIAL_233>",
240
+ "<SPECIAL_234>",
241
+ "<SPECIAL_235>",
242
+ "<SPECIAL_236>",
243
+ "<SPECIAL_237>",
244
+ "<SPECIAL_238>",
245
+ "<SPECIAL_239>",
246
+ "<SPECIAL_240>",
247
+ "<SPECIAL_241>",
248
+ "<SPECIAL_242>",
249
+ "<SPECIAL_243>",
250
+ "<SPECIAL_244>",
251
+ "<SPECIAL_245>",
252
+ "<SPECIAL_246>",
253
+ "<SPECIAL_247>",
254
+ "<SPECIAL_248>",
255
+ "<SPECIAL_249>",
256
+ "<SPECIAL_250>",
257
+ "<SPECIAL_251>",
258
+ "<SPECIAL_252>",
259
+ "<SPECIAL_253>",
260
+ "<SPECIAL_254>",
261
+ "<SPECIAL_255>",
262
+ "<SPECIAL_256>",
263
+ "<SPECIAL_257>",
264
+ "<SPECIAL_258>",
265
+ "<SPECIAL_259>",
266
+ "<SPECIAL_260>",
267
+ "<SPECIAL_261>",
268
+ "<SPECIAL_262>",
269
+ "<SPECIAL_263>",
270
+ "<SPECIAL_264>",
271
+ "<SPECIAL_265>",
272
+ "<SPECIAL_266>",
273
+ "<SPECIAL_267>",
274
+ "<SPECIAL_268>",
275
+ "<SPECIAL_269>",
276
+ "<SPECIAL_270>",
277
+ "<SPECIAL_271>",
278
+ "<SPECIAL_272>",
279
+ "<SPECIAL_273>",
280
+ "<SPECIAL_274>",
281
+ "<SPECIAL_275>",
282
+ "<SPECIAL_276>",
283
+ "<SPECIAL_277>",
284
+ "<SPECIAL_278>",
285
+ "<SPECIAL_279>",
286
+ "<SPECIAL_280>",
287
+ "<SPECIAL_281>",
288
+ "<SPECIAL_282>",
289
+ "<SPECIAL_283>",
290
+ "<SPECIAL_284>",
291
+ "<SPECIAL_285>",
292
+ "<SPECIAL_286>",
293
+ "<SPECIAL_287>",
294
+ "<SPECIAL_288>",
295
+ "<SPECIAL_289>",
296
+ "<SPECIAL_290>",
297
+ "<SPECIAL_291>",
298
+ "<SPECIAL_292>",
299
+ "<SPECIAL_293>",
300
+ "<SPECIAL_294>",
301
+ "<SPECIAL_295>",
302
+ "<SPECIAL_296>",
303
+ "<SPECIAL_297>",
304
+ "<SPECIAL_298>",
305
+ "<SPECIAL_299>",
306
+ "<SPECIAL_300>",
307
+ "<SPECIAL_301>",
308
+ "<SPECIAL_302>",
309
+ "<SPECIAL_303>",
310
+ "<SPECIAL_304>",
311
+ "<SPECIAL_305>",
312
+ "<SPECIAL_306>",
313
+ "<SPECIAL_307>",
314
+ "<SPECIAL_308>",
315
+ "<SPECIAL_309>",
316
+ "<SPECIAL_310>",
317
+ "<SPECIAL_311>",
318
+ "<SPECIAL_312>",
319
+ "<SPECIAL_313>",
320
+ "<SPECIAL_314>",
321
+ "<SPECIAL_315>",
322
+ "<SPECIAL_316>",
323
+ "<SPECIAL_317>",
324
+ "<SPECIAL_318>",
325
+ "<SPECIAL_319>",
326
+ "<SPECIAL_320>",
327
+ "<SPECIAL_321>",
328
+ "<SPECIAL_322>",
329
+ "<SPECIAL_323>",
330
+ "<SPECIAL_324>",
331
+ "<SPECIAL_325>",
332
+ "<SPECIAL_326>",
333
+ "<SPECIAL_327>",
334
+ "<SPECIAL_328>",
335
+ "<SPECIAL_329>",
336
+ "<SPECIAL_330>",
337
+ "<SPECIAL_331>",
338
+ "<SPECIAL_332>",
339
+ "<SPECIAL_333>",
340
+ "<SPECIAL_334>",
341
+ "<SPECIAL_335>",
342
+ "<SPECIAL_336>",
343
+ "<SPECIAL_337>",
344
+ "<SPECIAL_338>",
345
+ "<SPECIAL_339>",
346
+ "<SPECIAL_340>",
347
+ "<SPECIAL_341>",
348
+ "<SPECIAL_342>",
349
+ "<SPECIAL_343>",
350
+ "<SPECIAL_344>",
351
+ "<SPECIAL_345>",
352
+ "<SPECIAL_346>",
353
+ "<SPECIAL_347>",
354
+ "<SPECIAL_348>",
355
+ "<SPECIAL_349>",
356
+ "<SPECIAL_350>",
357
+ "<SPECIAL_351>",
358
+ "<SPECIAL_352>",
359
+ "<SPECIAL_353>",
360
+ "<SPECIAL_354>",
361
+ "<SPECIAL_355>",
362
+ "<SPECIAL_356>",
363
+ "<SPECIAL_357>",
364
+ "<SPECIAL_358>",
365
+ "<SPECIAL_359>",
366
+ "<SPECIAL_360>",
367
+ "<SPECIAL_361>",
368
+ "<SPECIAL_362>",
369
+ "<SPECIAL_363>",
370
+ "<SPECIAL_364>",
371
+ "<SPECIAL_365>",
372
+ "<SPECIAL_366>",
373
+ "<SPECIAL_367>",
374
+ "<SPECIAL_368>",
375
+ "<SPECIAL_369>",
376
+ "<SPECIAL_370>",
377
+ "<SPECIAL_371>",
378
+ "<SPECIAL_372>",
379
+ "<SPECIAL_373>",
380
+ "<SPECIAL_374>",
381
+ "<SPECIAL_375>",
382
+ "<SPECIAL_376>",
383
+ "<SPECIAL_377>",
384
+ "<SPECIAL_378>",
385
+ "<SPECIAL_379>",
386
+ "<SPECIAL_380>",
387
+ "<SPECIAL_381>",
388
+ "<SPECIAL_382>",
389
+ "<SPECIAL_383>",
390
+ "<SPECIAL_384>",
391
+ "<SPECIAL_385>",
392
+ "<SPECIAL_386>",
393
+ "<SPECIAL_387>",
394
+ "<SPECIAL_388>",
395
+ "<SPECIAL_389>",
396
+ "<SPECIAL_390>",
397
+ "<SPECIAL_391>",
398
+ "<SPECIAL_392>",
399
+ "<SPECIAL_393>",
400
+ "<SPECIAL_394>",
401
+ "<SPECIAL_395>",
402
+ "<SPECIAL_396>",
403
+ "<SPECIAL_397>",
404
+ "<SPECIAL_398>",
405
+ "<SPECIAL_399>",
406
+ "<SPECIAL_400>",
407
+ "<SPECIAL_401>",
408
+ "<SPECIAL_402>",
409
+ "<SPECIAL_403>",
410
+ "<SPECIAL_404>",
411
+ "<SPECIAL_405>",
412
+ "<SPECIAL_406>",
413
+ "<SPECIAL_407>",
414
+ "<SPECIAL_408>",
415
+ "<SPECIAL_409>",
416
+ "<SPECIAL_410>",
417
+ "<SPECIAL_411>",
418
+ "<SPECIAL_412>",
419
+ "<SPECIAL_413>",
420
+ "<SPECIAL_414>",
421
+ "<SPECIAL_415>",
422
+ "<SPECIAL_416>",
423
+ "<SPECIAL_417>",
424
+ "<SPECIAL_418>",
425
+ "<SPECIAL_419>",
426
+ "<SPECIAL_420>",
427
+ "<SPECIAL_421>",
428
+ "<SPECIAL_422>",
429
+ "<SPECIAL_423>",
430
+ "<SPECIAL_424>",
431
+ "<SPECIAL_425>",
432
+ "<SPECIAL_426>",
433
+ "<SPECIAL_427>",
434
+ "<SPECIAL_428>",
435
+ "<SPECIAL_429>",
436
+ "<SPECIAL_430>",
437
+ "<SPECIAL_431>",
438
+ "<SPECIAL_432>",
439
+ "<SPECIAL_433>",
440
+ "<SPECIAL_434>",
441
+ "<SPECIAL_435>",
442
+ "<SPECIAL_436>",
443
+ "<SPECIAL_437>",
444
+ "<SPECIAL_438>",
445
+ "<SPECIAL_439>",
446
+ "<SPECIAL_440>",
447
+ "<SPECIAL_441>",
448
+ "<SPECIAL_442>",
449
+ "<SPECIAL_443>",
450
+ "<SPECIAL_444>",
451
+ "<SPECIAL_445>",
452
+ "<SPECIAL_446>",
453
+ "<SPECIAL_447>",
454
+ "<SPECIAL_448>",
455
+ "<SPECIAL_449>",
456
+ "<SPECIAL_450>",
457
+ "<SPECIAL_451>",
458
+ "<SPECIAL_452>",
459
+ "<SPECIAL_453>",
460
+ "<SPECIAL_454>",
461
+ "<SPECIAL_455>",
462
+ "<SPECIAL_456>",
463
+ "<SPECIAL_457>",
464
+ "<SPECIAL_458>",
465
+ "<SPECIAL_459>",
466
+ "<SPECIAL_460>",
467
+ "<SPECIAL_461>",
468
+ "<SPECIAL_462>",
469
+ "<SPECIAL_463>",
470
+ "<SPECIAL_464>",
471
+ "<SPECIAL_465>",
472
+ "<SPECIAL_466>",
473
+ "<SPECIAL_467>",
474
+ "<SPECIAL_468>",
475
+ "<SPECIAL_469>",
476
+ "<SPECIAL_470>",
477
+ "<SPECIAL_471>",
478
+ "<SPECIAL_472>",
479
+ "<SPECIAL_473>",
480
+ "<SPECIAL_474>",
481
+ "<SPECIAL_475>",
482
+ "<SPECIAL_476>",
483
+ "<SPECIAL_477>",
484
+ "<SPECIAL_478>",
485
+ "<SPECIAL_479>",
486
+ "<SPECIAL_480>",
487
+ "<SPECIAL_481>",
488
+ "<SPECIAL_482>",
489
+ "<SPECIAL_483>",
490
+ "<SPECIAL_484>",
491
+ "<SPECIAL_485>",
492
+ "<SPECIAL_486>",
493
+ "<SPECIAL_487>",
494
+ "<SPECIAL_488>",
495
+ "<SPECIAL_489>",
496
+ "<SPECIAL_490>",
497
+ "<SPECIAL_491>",
498
+ "<SPECIAL_492>",
499
+ "<SPECIAL_493>",
500
+ "<SPECIAL_494>",
501
+ "<SPECIAL_495>",
502
+ "<SPECIAL_496>",
503
+ "<SPECIAL_497>",
504
+ "<SPECIAL_498>",
505
+ "<SPECIAL_499>",
506
+ "<SPECIAL_500>",
507
+ "<SPECIAL_501>",
508
+ "<SPECIAL_502>",
509
+ "<SPECIAL_503>",
510
+ "<SPECIAL_504>",
511
+ "<SPECIAL_505>",
512
+ "<SPECIAL_506>",
513
+ "<SPECIAL_507>",
514
+ "<SPECIAL_508>",
515
+ "<SPECIAL_509>",
516
+ "<SPECIAL_510>",
517
+ "<SPECIAL_511>",
518
+ "<SPECIAL_512>",
519
+ "<SPECIAL_513>",
520
+ "<SPECIAL_514>",
521
+ "<SPECIAL_515>",
522
+ "<SPECIAL_516>",
523
+ "<SPECIAL_517>",
524
+ "<SPECIAL_518>",
525
+ "<SPECIAL_519>",
526
+ "<SPECIAL_520>",
527
+ "<SPECIAL_521>",
528
+ "<SPECIAL_522>",
529
+ "<SPECIAL_523>",
530
+ "<SPECIAL_524>",
531
+ "<SPECIAL_525>",
532
+ "<SPECIAL_526>",
533
+ "<SPECIAL_527>",
534
+ "<SPECIAL_528>",
535
+ "<SPECIAL_529>",
536
+ "<SPECIAL_530>",
537
+ "<SPECIAL_531>",
538
+ "<SPECIAL_532>",
539
+ "<SPECIAL_533>",
540
+ "<SPECIAL_534>",
541
+ "<SPECIAL_535>",
542
+ "<SPECIAL_536>",
543
+ "<SPECIAL_537>",
544
+ "<SPECIAL_538>",
545
+ "<SPECIAL_539>",
546
+ "<SPECIAL_540>",
547
+ "<SPECIAL_541>",
548
+ "<SPECIAL_542>",
549
+ "<SPECIAL_543>",
550
+ "<SPECIAL_544>",
551
+ "<SPECIAL_545>",
552
+ "<SPECIAL_546>",
553
+ "<SPECIAL_547>",
554
+ "<SPECIAL_548>",
555
+ "<SPECIAL_549>",
556
+ "<SPECIAL_550>",
557
+ "<SPECIAL_551>",
558
+ "<SPECIAL_552>",
559
+ "<SPECIAL_553>",
560
+ "<SPECIAL_554>",
561
+ "<SPECIAL_555>",
562
+ "<SPECIAL_556>",
563
+ "<SPECIAL_557>",
564
+ "<SPECIAL_558>",
565
+ "<SPECIAL_559>",
566
+ "<SPECIAL_560>",
567
+ "<SPECIAL_561>",
568
+ "<SPECIAL_562>",
569
+ "<SPECIAL_563>",
570
+ "<SPECIAL_564>",
571
+ "<SPECIAL_565>",
572
+ "<SPECIAL_566>",
573
+ "<SPECIAL_567>",
574
+ "<SPECIAL_568>",
575
+ "<SPECIAL_569>",
576
+ "<SPECIAL_570>",
577
+ "<SPECIAL_571>",
578
+ "<SPECIAL_572>",
579
+ "<SPECIAL_573>",
580
+ "<SPECIAL_574>",
581
+ "<SPECIAL_575>",
582
+ "<SPECIAL_576>",
583
+ "<SPECIAL_577>",
584
+ "<SPECIAL_578>",
585
+ "<SPECIAL_579>",
586
+ "<SPECIAL_580>",
587
+ "<SPECIAL_581>",
588
+ "<SPECIAL_582>",
589
+ "<SPECIAL_583>",
590
+ "<SPECIAL_584>",
591
+ "<SPECIAL_585>",
592
+ "<SPECIAL_586>",
593
+ "<SPECIAL_587>",
594
+ "<SPECIAL_588>",
595
+ "<SPECIAL_589>",
596
+ "<SPECIAL_590>",
597
+ "<SPECIAL_591>",
598
+ "<SPECIAL_592>",
599
+ "<SPECIAL_593>",
600
+ "<SPECIAL_594>",
601
+ "<SPECIAL_595>",
602
+ "<SPECIAL_596>",
603
+ "<SPECIAL_597>",
604
+ "<SPECIAL_598>",
605
+ "<SPECIAL_599>",
606
+ "<SPECIAL_600>",
607
+ "<SPECIAL_601>",
608
+ "<SPECIAL_602>",
609
+ "<SPECIAL_603>",
610
+ "<SPECIAL_604>",
611
+ "<SPECIAL_605>",
612
+ "<SPECIAL_606>",
613
+ "<SPECIAL_607>",
614
+ "<SPECIAL_608>",
615
+ "<SPECIAL_609>",
616
+ "<SPECIAL_610>",
617
+ "<SPECIAL_611>",
618
+ "<SPECIAL_612>",
619
+ "<SPECIAL_613>",
620
+ "<SPECIAL_614>",
621
+ "<SPECIAL_615>",
622
+ "<SPECIAL_616>",
623
+ "<SPECIAL_617>",
624
+ "<SPECIAL_618>",
625
+ "<SPECIAL_619>",
626
+ "<SPECIAL_620>",
627
+ "<SPECIAL_621>",
628
+ "<SPECIAL_622>",
629
+ "<SPECIAL_623>",
630
+ "<SPECIAL_624>",
631
+ "<SPECIAL_625>",
632
+ "<SPECIAL_626>",
633
+ "<SPECIAL_627>",
634
+ "<SPECIAL_628>",
635
+ "<SPECIAL_629>",
636
+ "<SPECIAL_630>",
637
+ "<SPECIAL_631>",
638
+ "<SPECIAL_632>",
639
+ "<SPECIAL_633>",
640
+ "<SPECIAL_634>",
641
+ "<SPECIAL_635>",
642
+ "<SPECIAL_636>",
643
+ "<SPECIAL_637>",
644
+ "<SPECIAL_638>",
645
+ "<SPECIAL_639>",
646
+ "<SPECIAL_640>",
647
+ "<SPECIAL_641>",
648
+ "<SPECIAL_642>",
649
+ "<SPECIAL_643>",
650
+ "<SPECIAL_644>",
651
+ "<SPECIAL_645>",
652
+ "<SPECIAL_646>",
653
+ "<SPECIAL_647>",
654
+ "<SPECIAL_648>",
655
+ "<SPECIAL_649>",
656
+ "<SPECIAL_650>",
657
+ "<SPECIAL_651>",
658
+ "<SPECIAL_652>",
659
+ "<SPECIAL_653>",
660
+ "<SPECIAL_654>",
661
+ "<SPECIAL_655>",
662
+ "<SPECIAL_656>",
663
+ "<SPECIAL_657>",
664
+ "<SPECIAL_658>",
665
+ "<SPECIAL_659>",
666
+ "<SPECIAL_660>",
667
+ "<SPECIAL_661>",
668
+ "<SPECIAL_662>",
669
+ "<SPECIAL_663>",
670
+ "<SPECIAL_664>",
671
+ "<SPECIAL_665>",
672
+ "<SPECIAL_666>",
673
+ "<SPECIAL_667>",
674
+ "<SPECIAL_668>",
675
+ "<SPECIAL_669>",
676
+ "<SPECIAL_670>",
677
+ "<SPECIAL_671>",
678
+ "<SPECIAL_672>",
679
+ "<SPECIAL_673>",
680
+ "<SPECIAL_674>",
681
+ "<SPECIAL_675>",
682
+ "<SPECIAL_676>",
683
+ "<SPECIAL_677>",
684
+ "<SPECIAL_678>",
685
+ "<SPECIAL_679>",
686
+ "<SPECIAL_680>",
687
+ "<SPECIAL_681>",
688
+ "<SPECIAL_682>",
689
+ "<SPECIAL_683>",
690
+ "<SPECIAL_684>",
691
+ "<SPECIAL_685>",
692
+ "<SPECIAL_686>",
693
+ "<SPECIAL_687>",
694
+ "<SPECIAL_688>",
695
+ "<SPECIAL_689>",
696
+ "<SPECIAL_690>",
697
+ "<SPECIAL_691>",
698
+ "<SPECIAL_692>",
699
+ "<SPECIAL_693>",
700
+ "<SPECIAL_694>",
701
+ "<SPECIAL_695>",
702
+ "<SPECIAL_696>",
703
+ "<SPECIAL_697>",
704
+ "<SPECIAL_698>",
705
+ "<SPECIAL_699>",
706
+ "<SPECIAL_700>",
707
+ "<SPECIAL_701>",
708
+ "<SPECIAL_702>",
709
+ "<SPECIAL_703>",
710
+ "<SPECIAL_704>",
711
+ "<SPECIAL_705>",
712
+ "<SPECIAL_706>",
713
+ "<SPECIAL_707>",
714
+ "<SPECIAL_708>",
715
+ "<SPECIAL_709>",
716
+ "<SPECIAL_710>",
717
+ "<SPECIAL_711>",
718
+ "<SPECIAL_712>",
719
+ "<SPECIAL_713>",
720
+ "<SPECIAL_714>",
721
+ "<SPECIAL_715>",
722
+ "<SPECIAL_716>",
723
+ "<SPECIAL_717>",
724
+ "<SPECIAL_718>",
725
+ "<SPECIAL_719>",
726
+ "<SPECIAL_720>",
727
+ "<SPECIAL_721>",
728
+ "<SPECIAL_722>",
729
+ "<SPECIAL_723>",
730
+ "<SPECIAL_724>",
731
+ "<SPECIAL_725>",
732
+ "<SPECIAL_726>",
733
+ "<SPECIAL_727>",
734
+ "<SPECIAL_728>",
735
+ "<SPECIAL_729>",
736
+ "<SPECIAL_730>",
737
+ "<SPECIAL_731>",
738
+ "<SPECIAL_732>",
739
+ "<SPECIAL_733>",
740
+ "<SPECIAL_734>",
741
+ "<SPECIAL_735>",
742
+ "<SPECIAL_736>",
743
+ "<SPECIAL_737>",
744
+ "<SPECIAL_738>",
745
+ "<SPECIAL_739>",
746
+ "<SPECIAL_740>",
747
+ "<SPECIAL_741>",
748
+ "<SPECIAL_742>",
749
+ "<SPECIAL_743>",
750
+ "<SPECIAL_744>",
751
+ "<SPECIAL_745>",
752
+ "<SPECIAL_746>",
753
+ "<SPECIAL_747>",
754
+ "<SPECIAL_748>",
755
+ "<SPECIAL_749>",
756
+ "<SPECIAL_750>",
757
+ "<SPECIAL_751>",
758
+ "<SPECIAL_752>",
759
+ "<SPECIAL_753>",
760
+ "<SPECIAL_754>",
761
+ "<SPECIAL_755>",
762
+ "<SPECIAL_756>",
763
+ "<SPECIAL_757>",
764
+ "<SPECIAL_758>",
765
+ "<SPECIAL_759>",
766
+ "<SPECIAL_760>",
767
+ "<SPECIAL_761>",
768
+ "<SPECIAL_762>",
769
+ "<SPECIAL_763>",
770
+ "<SPECIAL_764>",
771
+ "<SPECIAL_765>",
772
+ "<SPECIAL_766>",
773
+ "<SPECIAL_767>",
774
+ "<SPECIAL_768>",
775
+ "<SPECIAL_769>",
776
+ "<SPECIAL_770>",
777
+ "<SPECIAL_771>",
778
+ "<SPECIAL_772>",
779
+ "<SPECIAL_773>",
780
+ "<SPECIAL_774>",
781
+ "<SPECIAL_775>",
782
+ "<SPECIAL_776>",
783
+ "<SPECIAL_777>",
784
+ "<SPECIAL_778>",
785
+ "<SPECIAL_779>",
786
+ "<SPECIAL_780>",
787
+ "<SPECIAL_781>",
788
+ "<SPECIAL_782>",
789
+ "<SPECIAL_783>",
790
+ "<SPECIAL_784>",
791
+ "<SPECIAL_785>",
792
+ "<SPECIAL_786>",
793
+ "<SPECIAL_787>",
794
+ "<SPECIAL_788>",
795
+ "<SPECIAL_789>",
796
+ "<SPECIAL_790>",
797
+ "<SPECIAL_791>",
798
+ "<SPECIAL_792>",
799
+ "<SPECIAL_793>",
800
+ "<SPECIAL_794>",
801
+ "<SPECIAL_795>",
802
+ "<SPECIAL_796>",
803
+ "<SPECIAL_797>",
804
+ "<SPECIAL_798>",
805
+ "<SPECIAL_799>",
806
+ "<SPECIAL_800>",
807
+ "<SPECIAL_801>",
808
+ "<SPECIAL_802>",
809
+ "<SPECIAL_803>",
810
+ "<SPECIAL_804>",
811
+ "<SPECIAL_805>",
812
+ "<SPECIAL_806>",
813
+ "<SPECIAL_807>",
814
+ "<SPECIAL_808>",
815
+ "<SPECIAL_809>",
816
+ "<SPECIAL_810>",
817
+ "<SPECIAL_811>",
818
+ "<SPECIAL_812>",
819
+ "<SPECIAL_813>",
820
+ "<SPECIAL_814>",
821
+ "<SPECIAL_815>",
822
+ "<SPECIAL_816>",
823
+ "<SPECIAL_817>",
824
+ "<SPECIAL_818>",
825
+ "<SPECIAL_819>",
826
+ "<SPECIAL_820>",
827
+ "<SPECIAL_821>",
828
+ "<SPECIAL_822>",
829
+ "<SPECIAL_823>",
830
+ "<SPECIAL_824>",
831
+ "<SPECIAL_825>",
832
+ "<SPECIAL_826>",
833
+ "<SPECIAL_827>",
834
+ "<SPECIAL_828>",
835
+ "<SPECIAL_829>",
836
+ "<SPECIAL_830>",
837
+ "<SPECIAL_831>",
838
+ "<SPECIAL_832>",
839
+ "<SPECIAL_833>",
840
+ "<SPECIAL_834>",
841
+ "<SPECIAL_835>",
842
+ "<SPECIAL_836>",
843
+ "<SPECIAL_837>",
844
+ "<SPECIAL_838>",
845
+ "<SPECIAL_839>",
846
+ "<SPECIAL_840>",
847
+ "<SPECIAL_841>",
848
+ "<SPECIAL_842>",
849
+ "<SPECIAL_843>",
850
+ "<SPECIAL_844>",
851
+ "<SPECIAL_845>",
852
+ "<SPECIAL_846>",
853
+ "<SPECIAL_847>",
854
+ "<SPECIAL_848>",
855
+ "<SPECIAL_849>",
856
+ "<SPECIAL_850>",
857
+ "<SPECIAL_851>",
858
+ "<SPECIAL_852>",
859
+ "<SPECIAL_853>",
860
+ "<SPECIAL_854>",
861
+ "<SPECIAL_855>",
862
+ "<SPECIAL_856>",
863
+ "<SPECIAL_857>",
864
+ "<SPECIAL_858>",
865
+ "<SPECIAL_859>",
866
+ "<SPECIAL_860>",
867
+ "<SPECIAL_861>",
868
+ "<SPECIAL_862>",
869
+ "<SPECIAL_863>",
870
+ "<SPECIAL_864>",
871
+ "<SPECIAL_865>",
872
+ "<SPECIAL_866>",
873
+ "<SPECIAL_867>",
874
+ "<SPECIAL_868>",
875
+ "<SPECIAL_869>",
876
+ "<SPECIAL_870>",
877
+ "<SPECIAL_871>",
878
+ "<SPECIAL_872>",
879
+ "<SPECIAL_873>",
880
+ "<SPECIAL_874>",
881
+ "<SPECIAL_875>",
882
+ "<SPECIAL_876>",
883
+ "<SPECIAL_877>",
884
+ "<SPECIAL_878>",
885
+ "<SPECIAL_879>",
886
+ "<SPECIAL_880>",
887
+ "<SPECIAL_881>",
888
+ "<SPECIAL_882>",
889
+ "<SPECIAL_883>",
890
+ "<SPECIAL_884>",
891
+ "<SPECIAL_885>",
892
+ "<SPECIAL_886>",
893
+ "<SPECIAL_887>",
894
+ "<SPECIAL_888>",
895
+ "<SPECIAL_889>",
896
+ "<SPECIAL_890>",
897
+ "<SPECIAL_891>",
898
+ "<SPECIAL_892>",
899
+ "<SPECIAL_893>",
900
+ "<SPECIAL_894>",
901
+ "<SPECIAL_895>",
902
+ "<SPECIAL_896>",
903
+ "<SPECIAL_897>",
904
+ "<SPECIAL_898>",
905
+ "<SPECIAL_899>",
906
+ "<SPECIAL_900>",
907
+ "<SPECIAL_901>",
908
+ "<SPECIAL_902>",
909
+ "<SPECIAL_903>",
910
+ "<SPECIAL_904>",
911
+ "<SPECIAL_905>",
912
+ "<SPECIAL_906>",
913
+ "<SPECIAL_907>",
914
+ "<SPECIAL_908>",
915
+ "<SPECIAL_909>",
916
+ "<SPECIAL_910>",
917
+ "<SPECIAL_911>",
918
+ "<SPECIAL_912>",
919
+ "<SPECIAL_913>",
920
+ "<SPECIAL_914>",
921
+ "<SPECIAL_915>",
922
+ "<SPECIAL_916>",
923
+ "<SPECIAL_917>",
924
+ "<SPECIAL_918>",
925
+ "<SPECIAL_919>",
926
+ "<SPECIAL_920>",
927
+ "<SPECIAL_921>",
928
+ "<SPECIAL_922>",
929
+ "<SPECIAL_923>",
930
+ "<SPECIAL_924>",
931
+ "<SPECIAL_925>",
932
+ "<SPECIAL_926>",
933
+ "<SPECIAL_927>",
934
+ "<SPECIAL_928>",
935
+ "<SPECIAL_929>",
936
+ "<SPECIAL_930>",
937
+ "<SPECIAL_931>",
938
+ "<SPECIAL_932>",
939
+ "<SPECIAL_933>",
940
+ "<SPECIAL_934>",
941
+ "<SPECIAL_935>",
942
+ "<SPECIAL_936>",
943
+ "<SPECIAL_937>",
944
+ "<SPECIAL_938>",
945
+ "<SPECIAL_939>",
946
+ "<SPECIAL_940>",
947
+ "<SPECIAL_941>",
948
+ "<SPECIAL_942>",
949
+ "<SPECIAL_943>",
950
+ "<SPECIAL_944>",
951
+ "<SPECIAL_945>",
952
+ "<SPECIAL_946>",
953
+ "<SPECIAL_947>",
954
+ "<SPECIAL_948>",
955
+ "<SPECIAL_949>",
956
+ "<SPECIAL_950>",
957
+ "<SPECIAL_951>",
958
+ "<SPECIAL_952>",
959
+ "<SPECIAL_953>",
960
+ "<SPECIAL_954>",
961
+ "<SPECIAL_955>",
962
+ "<SPECIAL_956>",
963
+ "<SPECIAL_957>",
964
+ "<SPECIAL_958>",
965
+ "<SPECIAL_959>",
966
+ "<SPECIAL_960>",
967
+ "<SPECIAL_961>",
968
+ "<SPECIAL_962>",
969
+ "<SPECIAL_963>",
970
+ "<SPECIAL_964>",
971
+ "<SPECIAL_965>",
972
+ "<SPECIAL_966>",
973
+ "<SPECIAL_967>",
974
+ "<SPECIAL_968>",
975
+ "<SPECIAL_969>",
976
+ "<SPECIAL_970>",
977
+ "<SPECIAL_971>",
978
+ "<SPECIAL_972>",
979
+ "<SPECIAL_973>",
980
+ "<SPECIAL_974>",
981
+ "<SPECIAL_975>",
982
+ "<SPECIAL_976>",
983
+ "<SPECIAL_977>",
984
+ "<SPECIAL_978>",
985
+ "<SPECIAL_979>",
986
+ "<SPECIAL_980>",
987
+ "<SPECIAL_981>",
988
+ "<SPECIAL_982>",
989
+ "<SPECIAL_983>",
990
+ "<SPECIAL_984>",
991
+ "<SPECIAL_985>",
992
+ "<SPECIAL_986>",
993
+ "<SPECIAL_987>",
994
+ "<SPECIAL_988>",
995
+ "<SPECIAL_989>",
996
+ "<SPECIAL_990>",
997
+ "<SPECIAL_991>",
998
+ "<SPECIAL_992>",
999
+ "<SPECIAL_993>",
1000
+ "<SPECIAL_994>",
1001
+ "<SPECIAL_995>",
1002
+ "<SPECIAL_996>",
1003
+ "<SPECIAL_997>",
1004
+ "<SPECIAL_998>",
1005
+ "<SPECIAL_999>"
1006
+ ],
1007
+ "is_local": true,
1008
+ "model_max_length": 1000000000000000019884624838656,
1009
+ "pad_token": "<pad>",
1010
+ "processor_class": "PixtralProcessor",
1011
+ "tokenizer_class": "TokenizersBackend",
1012
+ "unk_token": "<unk>"
1013
+ }