C10X commited on
Commit
a0b00a9
·
verified ·
1 Parent(s): 7577475

Upload 6 files

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {% for message in messages %}<|im_start|>{{ message['role'] }}
2
+ {% if message['role'] == 'assistant' %}{% generation %}{{ message['content'] }}<|im_end|>
3
+ {% endgeneration %}{% else %}{{ message['content'] }}<|im_end|>
4
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
5
+ {% endif %}
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 2,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2048,
15
+ "layer_types": [
16
+ "sliding_attention",
17
+ "full_attention",
18
+ "sliding_attention",
19
+ "full_attention",
20
+ "sliding_attention",
21
+ "full_attention"
22
+ ],
23
+ "max_position_embeddings": 8192,
24
+ "max_window_layers": 5,
25
+ "model_type": "qwen3",
26
+ "num_attention_heads": 8,
27
+ "num_hidden_layers": 6,
28
+ "num_key_value_heads": 2,
29
+ "pad_token_id": 2,
30
+ "rms_norm_eps": 1e-06,
31
+ "rope_parameters": {
32
+ "rope_theta": 500000,
33
+ "rope_type": "default"
34
+ },
35
+ "sliding_window": 512,
36
+ "tie_word_embeddings": true,
37
+ "transformers_version": "5.8.0.dev0",
38
+ "use_cache": false,
39
+ "use_sliding_window": true,
40
+ "vocab_size": 16384
41
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": [
5
+ 2
6
+ ],
7
+ "pad_token_id": 2,
8
+ "transformers_version": "5.8.0.dev0",
9
+ "use_cache": false
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a3db5166cd9ac9295aac6654169149c685c2381a09a3d352a7b83b9502da536
3
+ size 62412552
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|startoftext|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|return|>",
6
+ "falcon_latex_tokens": [
7
+ "\\blindtext",
8
+ "\\newpage",
9
+ "\\boxed",
10
+ "\\framebox",
11
+ "\\fbox",
12
+ "\\tag",
13
+ "\\nonumber",
14
+ "\\item",
15
+ "\\centering",
16
+ "\\caption",
17
+ "\\includegraphics",
18
+ "\\label",
19
+ "\\multicolumn",
20
+ "\\cline",
21
+ "\\hline",
22
+ "\\end",
23
+ "\\begin",
24
+ "\\tableofcontents",
25
+ "\\maketitle",
26
+ "\\date",
27
+ "\\author",
28
+ "\\title",
29
+ "\\chapter",
30
+ "\\subsubsection",
31
+ "\\subsection",
32
+ "\\section",
33
+ "\\noindent",
34
+ "\\newline",
35
+ "\\par",
36
+ "\\ddot",
37
+ "\\dot",
38
+ "\\bar",
39
+ "\\iff",
40
+ "\\implies",
41
+ "\\neg",
42
+ "\\lor",
43
+ "\\land",
44
+ "\\qquad",
45
+ "\\quad",
46
+ "\\arctan",
47
+ "\\arccos",
48
+ "\\arcsin",
49
+ "\\cot",
50
+ "\\sec",
51
+ "\\csc",
52
+ "\\tan",
53
+ "\\cos",
54
+ "\\sin",
55
+ "\\widetilde",
56
+ "\\widehat",
57
+ "\\complement",
58
+ "\\varnothing",
59
+ "\\tilde",
60
+ "\\hat",
61
+ "\\vec",
62
+ "\\mathfrak",
63
+ "\\mathcal",
64
+ "\\mathbb",
65
+ "\\emptyset",
66
+ "\\bot",
67
+ "\\aleph",
68
+ "\\Re",
69
+ "\\Im",
70
+ "\\ell",
71
+ "\\hbar",
72
+ "\\exists",
73
+ "\\forall",
74
+ "\\partial",
75
+ "\\nabla",
76
+ "\\infty",
77
+ "\\binom",
78
+ "\\overline",
79
+ "\\sqrt",
80
+ "\\tfrac",
81
+ "\\dfrac",
82
+ "\\frac",
83
+ "\\textsc",
84
+ "\\textsf",
85
+ "\\texttt",
86
+ "\\emph",
87
+ "\\underline",
88
+ "\\textit",
89
+ "\\textbf",
90
+ "\\bigoplus",
91
+ "\\bigcap",
92
+ "\\bigcup",
93
+ "\\prod",
94
+ "\\sum",
95
+ "\\oint",
96
+ "\\iiint",
97
+ "\\iint",
98
+ "\\int",
99
+ "\\right",
100
+ "\\left",
101
+ "\\Bigr",
102
+ "\\Bigl",
103
+ "\\bigr",
104
+ "\\bigl",
105
+ "\\rceil",
106
+ "\\lceil",
107
+ "\\rfloor",
108
+ "\\lfloor",
109
+ "\\rangle",
110
+ "\\langle",
111
+ "\\overleftarrow",
112
+ "\\overleftrightarrow",
113
+ "\\overrightarrow",
114
+ "\\Longleftrightarrow",
115
+ "\\longleftrightarrow",
116
+ "\\longrightarrow",
117
+ "\\longleftarrow",
118
+ "\\Longrightarrow",
119
+ "\\Longleftarrow",
120
+ "\\gets",
121
+ "\\to",
122
+ "\\mapsto",
123
+ "\\Updownarrow",
124
+ "\\Downarrow",
125
+ "\\Uparrow",
126
+ "\\updownarrow",
127
+ "\\downarrow",
128
+ "\\uparrow",
129
+ "\\Leftrightarrow",
130
+ "\\Rightarrow",
131
+ "\\Leftarrow",
132
+ "\\leftrightarrow",
133
+ "\\rightarrow",
134
+ "\\leftarrow",
135
+ "\\perp",
136
+ "\\propto",
137
+ "\\ni",
138
+ "\\notin",
139
+ "\\in",
140
+ "\\supseteq",
141
+ "\\supset",
142
+ "\\sqsupseteq",
143
+ "\\sqsubseteq",
144
+ "\\subseteq",
145
+ "\\subset",
146
+ "\\cong",
147
+ "\\approx",
148
+ "\\simeq",
149
+ "\\sim",
150
+ "\\equiv",
151
+ "\\neq",
152
+ "\\geq",
153
+ "\\leq",
154
+ "\\oslash",
155
+ "\\otimes",
156
+ "\\ominus",
157
+ "\\oplus",
158
+ "\\wedge",
159
+ "\\vee",
160
+ "\\sqcup",
161
+ "\\sqcap",
162
+ "\\uplus",
163
+ "\\cup",
164
+ "\\cap",
165
+ "\\cdot",
166
+ "\\bullet",
167
+ "\\circ",
168
+ "\\star",
169
+ "\\ast",
170
+ "\\div",
171
+ "\\times",
172
+ "\\mp",
173
+ "\\pm",
174
+ "\\omega",
175
+ "\\psi",
176
+ "\\chi",
177
+ "\\varphi",
178
+ "\\varsigma",
179
+ "\\phi",
180
+ "\\upsilon",
181
+ "\\tau",
182
+ "\\sigma",
183
+ "\\rho",
184
+ "\\pi",
185
+ "\\xi",
186
+ "\\nu",
187
+ "\\mu",
188
+ "\\lambda",
189
+ "\\kappa",
190
+ "\\iota",
191
+ "\\theta",
192
+ "\\eta",
193
+ "\\zeta",
194
+ "\\epsilon",
195
+ "\\delta",
196
+ "\\gamma",
197
+ "\\beta",
198
+ "\\alpha"
199
+ ],
200
+ "is_local": true,
201
+ "latex_token_source": "tiiuae/Falcon-H1-Tiny-90M-Instruct",
202
+ "local_files_only": false,
203
+ "model_input_names": [
204
+ "input_ids",
205
+ "attention_mask"
206
+ ],
207
+ "model_max_length": 1000000000000000019884624838656,
208
+ "pad_token": "<|return|>",
209
+ "seed_tokenizer": "openai/gpt-oss-20b",
210
+ "tokenizer_class": "TokenizersBackend"
211
+ }