Levelfive commited on
Commit
ddd7c97
·
verified ·
1 Parent(s): 0cb8e9a

Upload LIGHTBRAIN model

Browse files
.gitattributes CHANGED
@@ -1,35 +1,7 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
  *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto-generated by LIGHTBRAIN exporter
2
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.pt filter=lfs diff=lfs merge=lfs -text
5
  *.ckpt filter=lfs diff=lfs merge=lfs -text
 
 
6
  *.h5 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
7
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Model-001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62af9944faec015cb65e579004de3205e3a9ea7fc06bd953956dddbc197c3db4
3
+ size 756333712
README.md ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - en
5
+ tags:
6
+ - lightbrain
7
+ - field-dynamics
8
+ - sparse-activation
9
+ - text-generation
10
+ library_name: lightbrain
11
+ pipeline_tag: text-generation
12
+ model-index:
13
+ - name: lightbrain-100m
14
+ results: []
15
+ ---
16
+
17
+ # lightbrain-100m
18
+
19
+ ## Model Description
20
+
21
+ LIGHTBRAIN is a novel neural architecture based on **Hybrid Field Transformer** paradigm.
22
+
23
+ ### Key Features
24
+
25
+ - **Sparse Activation**: Only ~0.1-10% of field regions active during inference
26
+ - **Field Dynamics**: Pattern resonance for knowledge retrieval
27
+ - **Transformer Integration**: Self-attention for sequence modeling (hybrid)
28
+ - **OpenAI-Compatible API**: Drop-in replacement for chat completions
29
+
30
+ ## Architecture
31
+
32
+ | Component | Value |
33
+ |-----------|-------|
34
+ | Hidden Size | 768 |
35
+ | Layers | 12 |
36
+ | Attention Heads | 12 |
37
+ | Field Regions | 128 |
38
+ | Field Size | 128 |
39
+ | Field Depth | 64 |
40
+
41
+ ```
42
+ ┌─────────────────────────────────────┐
43
+ │ TRANSFORMER ENCODER LAYERS │
44
+ │ (Self-Attention + FFN) │
45
+ └─────────────────────────────────────┘
46
+
47
+ ┌─────────────────────────────────────┐
48
+ │ FIELD DYNAMICS CORE │
49
+ │ (Sparse Activation + Evolution) │
50
+ └─────────────────────────────────────┘
51
+
52
+ ┌─────────────────────────────────────┐
53
+ │ OUTPUT PROJECTION │
54
+ │ (Pattern → Token Logits) │
55
+ └─────────────────────────────────────┘
56
+ ```
57
+
58
+ ## Model Files
59
+
60
+ | File | Description |
61
+ |------|-------------|
62
+ | `Model-001.safetensors` | Model weights (721.30 MB) |
63
+ | `config.json` | Model configuration |
64
+ | `tokenizer.json` | Tokenizer vocabulary |
65
+ | `tokenizer_config.json` | Tokenizer configuration |
66
+ | `generation_config.json` | Generation parameters |
67
+ | `params.json` | LIGHTBRAIN parameters |
68
+
69
+ ## Model Stats
70
+
71
+ - **Original Size**: 721.28 MB
72
+ - **File Size**: 721.30 MB
73
+ - **Compression Ratio**: 1.00x
74
+ - **Number of Tensors**: 200
75
+
76
+ ## Usage
77
+
78
+ ### With LIGHTBRAIN Library
79
+
80
+ ```python
81
+ from lightbrain.model import HybridFieldTransformer
82
+ from lightbrain.inference import InferenceEngine
83
+
84
+ # Load model
85
+ model = HybridFieldTransformer.load("path/to/model")
86
+ engine = InferenceEngine(model=model)
87
+
88
+ # Generate
89
+ result = engine.generate("Hello, how are you?")
90
+ print(result.text)
91
+ ```
92
+
93
+ ### Loading from Safetensors
94
+
95
+ ```python
96
+ from safetensors.numpy import load_file
97
+ import json
98
+
99
+ # Load weights
100
+ weights = load_file("Model-001.safetensors")
101
+
102
+ # Load config
103
+ with open("config.json") as f:
104
+ config = json.load(f)
105
+
106
+ # Reconstruct model from weights
107
+ ```
108
+
109
+ ### In Google Colab
110
+
111
+ ```python
112
+ # Install
113
+ !pip install safetensors
114
+
115
+ # Download
116
+ from huggingface_hub import snapshot_download
117
+ model_path = snapshot_download(repo_id="lightbrain-100m")
118
+
119
+ # Load and use
120
+ from safetensors.numpy import load_file
121
+ weights = load_file(f"{model_path}/Model-001.safetensors")
122
+ ```
123
+
124
+ ## Training
125
+
126
+ Trained using LIGHTBRAIN framework with:
127
+ - Resonance Alignment (Hebbian learning)
128
+ - Gradient-based fine-tuning for transformer layers
129
+ - Field topology optimization
130
+
131
+ ## License
132
+
133
+ MIT License
134
+
135
+ ## Citation
136
+
137
+ ```bibtex
138
+ @misc{lightbrain2024,
139
+ title={LIGHTBRAIN: Hybrid Field Dynamics for Efficient LLMs},
140
+ year={2024},
141
+ publisher={HuggingFace}
142
+ }
143
+ ```
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "lightbrain-hybrid",
3
+ "architectures": [
4
+ "LightbrainHybridForCausalLM"
5
+ ],
6
+ "vocab_size": 256,
7
+ "max_position_embeddings": 2048,
8
+ "hidden_size": 768,
9
+ "num_hidden_layers": 12,
10
+ "num_attention_heads": 12,
11
+ "intermediate_size": 3072,
12
+ "num_regions": 128,
13
+ "field_size": 128,
14
+ "field_depth": 64,
15
+ "coupling_strength": 0.15,
16
+ "damping": 0.02,
17
+ "dt": 0.02,
18
+ "activation_threshold": 0.05,
19
+ "max_active_ratio": 0.1,
20
+ "max_evolution_steps": 20,
21
+ "convergence_threshold": 0.01,
22
+ "min_coherence": 0.2,
23
+ "torch_dtype": "float32",
24
+ "bos_token_id": 2,
25
+ "eos_token_id": 3,
26
+ "pad_token_id": 0
27
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 3,
5
+ "pad_token_id": 0,
6
+ "do_sample": true,
7
+ "temperature": 0.8,
8
+ "top_p": 0.9,
9
+ "top_k": 50,
10
+ "max_length": 2048,
11
+ "max_new_tokens": 512,
12
+ "repetition_penalty": 1.1,
13
+ "transformers_version": "4.36.0"
14
+ }
params.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "lightbrain-hybrid",
3
+ "architecture": "hybrid-field-transformer",
4
+ "version": "1.0.0",
5
+ "vocab_size": 256,
6
+ "compression": {
7
+ "format": "safetensors",
8
+ "algorithm": "zstd",
9
+ "level": 19
10
+ },
11
+ "stats": {
12
+ "num_tensors": 200,
13
+ "total_size_bytes": 756312064,
14
+ "file_size_bytes": 756333712,
15
+ "compression_ratio": 0.9999713777137571
16
+ },
17
+ "hidden_size": 768,
18
+ "num_hidden_layers": 12,
19
+ "num_attention_heads": 12,
20
+ "intermediate_size": 3072,
21
+ "num_regions": 128,
22
+ "field_size": 128,
23
+ "field_depth": 64,
24
+ "coupling_strength": 0.15,
25
+ "damping": 0.02,
26
+ "activation_threshold": 0.05,
27
+ "max_active_ratio": 0.1,
28
+ "max_evolution_steps": 20
29
+ }
tokenizer.json ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<pad>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<unk>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "<bos>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<eos>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": {
44
+ "type": "Sequence",
45
+ "normalizers": []
46
+ },
47
+ "pre_tokenizer": {
48
+ "type": "ByteLevel",
49
+ "add_prefix_space": false,
50
+ "trim_offsets": true,
51
+ "use_regex": true
52
+ },
53
+ "post_processor": {
54
+ "type": "TemplateProcessing",
55
+ "single": [
56
+ {
57
+ "SpecialToken": {
58
+ "id": "<bos>",
59
+ "type_id": 0
60
+ }
61
+ },
62
+ {
63
+ "Sequence": {
64
+ "id": "A",
65
+ "type_id": 0
66
+ }
67
+ }
68
+ ],
69
+ "pair": [
70
+ {
71
+ "SpecialToken": {
72
+ "id": "<bos>",
73
+ "type_id": 0
74
+ }
75
+ },
76
+ {
77
+ "Sequence": {
78
+ "id": "A",
79
+ "type_id": 0
80
+ }
81
+ },
82
+ {
83
+ "SpecialToken": {
84
+ "id": "<eos>",
85
+ "type_id": 0
86
+ }
87
+ },
88
+ {
89
+ "Sequence": {
90
+ "id": "B",
91
+ "type_id": 1
92
+ }
93
+ }
94
+ ],
95
+ "special_tokens": {
96
+ "<bos>": {
97
+ "id": "<bos>",
98
+ "ids": [
99
+ 2
100
+ ],
101
+ "tokens": [
102
+ "<bos>"
103
+ ]
104
+ },
105
+ "<eos>": {
106
+ "id": "<eos>",
107
+ "ids": [
108
+ 3
109
+ ],
110
+ "tokens": [
111
+ "<eos>"
112
+ ]
113
+ }
114
+ }
115
+ },
116
+ "decoder": {
117
+ "type": "ByteLevel",
118
+ "add_prefix_space": false,
119
+ "trim_offsets": true,
120
+ "use_regex": true
121
+ },
122
+ "model": {
123
+ "type": "BPE",
124
+ "dropout": null,
125
+ "unk_token": "<unk>",
126
+ "continuing_subword_prefix": null,
127
+ "end_of_word_suffix": null,
128
+ "fuse_unk": false,
129
+ "byte_fallback": true,
130
+ "vocab": {
131
+ "<0x00>": 0,
132
+ "<0x01>": 1,
133
+ "<0x02>": 2,
134
+ "<0x03>": 3,
135
+ "<0x04>": 4,
136
+ "<0x05>": 5,
137
+ "<0x06>": 6,
138
+ "<0x07>": 7,
139
+ "<0x08>": 8,
140
+ "<0x09>": 9,
141
+ "<0x0A>": 10,
142
+ "<0x0B>": 11,
143
+ "<0x0C>": 12,
144
+ "<0x0D>": 13,
145
+ "<0x0E>": 14,
146
+ "<0x0F>": 15,
147
+ "<0x10>": 16,
148
+ "<0x11>": 17,
149
+ "<0x12>": 18,
150
+ "<0x13>": 19,
151
+ "<0x14>": 20,
152
+ "<0x15>": 21,
153
+ "<0x16>": 22,
154
+ "<0x17>": 23,
155
+ "<0x18>": 24,
156
+ "<0x19>": 25,
157
+ "<0x1A>": 26,
158
+ "<0x1B>": 27,
159
+ "<0x1C>": 28,
160
+ "<0x1D>": 29,
161
+ "<0x1E>": 30,
162
+ "<0x1F>": 31,
163
+ " ": 32,
164
+ "!": 33,
165
+ "\"": 34,
166
+ "#": 35,
167
+ "$": 36,
168
+ "%": 37,
169
+ "&": 38,
170
+ "'": 39,
171
+ "(": 40,
172
+ ")": 41,
173
+ "*": 42,
174
+ "+": 43,
175
+ ",": 44,
176
+ "-": 45,
177
+ ".": 46,
178
+ "/": 47,
179
+ "0": 48,
180
+ "1": 49,
181
+ "2": 50,
182
+ "3": 51,
183
+ "4": 52,
184
+ "5": 53,
185
+ "6": 54,
186
+ "7": 55,
187
+ "8": 56,
188
+ "9": 57,
189
+ ":": 58,
190
+ ";": 59,
191
+ "<": 60,
192
+ "=": 61,
193
+ ">": 62,
194
+ "?": 63,
195
+ "@": 64,
196
+ "A": 65,
197
+ "B": 66,
198
+ "C": 67,
199
+ "D": 68,
200
+ "E": 69,
201
+ "F": 70,
202
+ "G": 71,
203
+ "H": 72,
204
+ "I": 73,
205
+ "J": 74,
206
+ "K": 75,
207
+ "L": 76,
208
+ "M": 77,
209
+ "N": 78,
210
+ "O": 79,
211
+ "P": 80,
212
+ "Q": 81,
213
+ "R": 82,
214
+ "S": 83,
215
+ "T": 84,
216
+ "U": 85,
217
+ "V": 86,
218
+ "W": 87,
219
+ "X": 88,
220
+ "Y": 89,
221
+ "Z": 90,
222
+ "[": 91,
223
+ "\\": 92,
224
+ "]": 93,
225
+ "^": 94,
226
+ "_": 95,
227
+ "`": 96,
228
+ "a": 97,
229
+ "b": 98,
230
+ "c": 99,
231
+ "d": 100,
232
+ "e": 101,
233
+ "f": 102,
234
+ "g": 103,
235
+ "h": 104,
236
+ "i": 105,
237
+ "j": 106,
238
+ "k": 107,
239
+ "l": 108,
240
+ "m": 109,
241
+ "n": 110,
242
+ "o": 111,
243
+ "p": 112,
244
+ "q": 113,
245
+ "r": 114,
246
+ "s": 115,
247
+ "t": 116,
248
+ "u": 117,
249
+ "v": 118,
250
+ "w": 119,
251
+ "x": 120,
252
+ "y": 121,
253
+ "z": 122,
254
+ "{": 123,
255
+ "|": 124,
256
+ "}": 125,
257
+ "~": 126,
258
+ "<0x7F>": 127,
259
+ "<0x80>": 128,
260
+ "<0x81>": 129,
261
+ "<0x82>": 130,
262
+ "<0x83>": 131,
263
+ "<0x84>": 132,
264
+ "<0x85>": 133,
265
+ "<0x86>": 134,
266
+ "<0x87>": 135,
267
+ "<0x88>": 136,
268
+ "<0x89>": 137,
269
+ "<0x8A>": 138,
270
+ "<0x8B>": 139,
271
+ "<0x8C>": 140,
272
+ "<0x8D>": 141,
273
+ "<0x8E>": 142,
274
+ "<0x8F>": 143,
275
+ "<0x90>": 144,
276
+ "<0x91>": 145,
277
+ "<0x92>": 146,
278
+ "<0x93>": 147,
279
+ "<0x94>": 148,
280
+ "<0x95>": 149,
281
+ "<0x96>": 150,
282
+ "<0x97>": 151,
283
+ "<0x98>": 152,
284
+ "<0x99>": 153,
285
+ "<0x9A>": 154,
286
+ "<0x9B>": 155,
287
+ "<0x9C>": 156,
288
+ "<0x9D>": 157,
289
+ "<0x9E>": 158,
290
+ "<0x9F>": 159,
291
+ "<0xA0>": 160,
292
+ "<0xA1>": 161,
293
+ "<0xA2>": 162,
294
+ "<0xA3>": 163,
295
+ "<0xA4>": 164,
296
+ "<0xA5>": 165,
297
+ "<0xA6>": 166,
298
+ "<0xA7>": 167,
299
+ "<0xA8>": 168,
300
+ "<0xA9>": 169,
301
+ "<0xAA>": 170,
302
+ "<0xAB>": 171,
303
+ "<0xAC>": 172,
304
+ "<0xAD>": 173,
305
+ "<0xAE>": 174,
306
+ "<0xAF>": 175,
307
+ "<0xB0>": 176,
308
+ "<0xB1>": 177,
309
+ "<0xB2>": 178,
310
+ "<0xB3>": 179,
311
+ "<0xB4>": 180,
312
+ "<0xB5>": 181,
313
+ "<0xB6>": 182,
314
+ "<0xB7>": 183,
315
+ "<0xB8>": 184,
316
+ "<0xB9>": 185,
317
+ "<0xBA>": 186,
318
+ "<0xBB>": 187,
319
+ "<0xBC>": 188,
320
+ "<0xBD>": 189,
321
+ "<0xBE>": 190,
322
+ "<0xBF>": 191,
323
+ "<0xC0>": 192,
324
+ "<0xC1>": 193,
325
+ "<0xC2>": 194,
326
+ "<0xC3>": 195,
327
+ "<0xC4>": 196,
328
+ "<0xC5>": 197,
329
+ "<0xC6>": 198,
330
+ "<0xC7>": 199,
331
+ "<0xC8>": 200,
332
+ "<0xC9>": 201,
333
+ "<0xCA>": 202,
334
+ "<0xCB>": 203,
335
+ "<0xCC>": 204,
336
+ "<0xCD>": 205,
337
+ "<0xCE>": 206,
338
+ "<0xCF>": 207,
339
+ "<0xD0>": 208,
340
+ "<0xD1>": 209,
341
+ "<0xD2>": 210,
342
+ "<0xD3>": 211,
343
+ "<0xD4>": 212,
344
+ "<0xD5>": 213,
345
+ "<0xD6>": 214,
346
+ "<0xD7>": 215,
347
+ "<0xD8>": 216,
348
+ "<0xD9>": 217,
349
+ "<0xDA>": 218,
350
+ "<0xDB>": 219,
351
+ "<0xDC>": 220,
352
+ "<0xDD>": 221,
353
+ "<0xDE>": 222,
354
+ "<0xDF>": 223,
355
+ "<0xE0>": 224,
356
+ "<0xE1>": 225,
357
+ "<0xE2>": 226,
358
+ "<0xE3>": 227,
359
+ "<0xE4>": 228,
360
+ "<0xE5>": 229,
361
+ "<0xE6>": 230,
362
+ "<0xE7>": 231,
363
+ "<0xE8>": 232,
364
+ "<0xE9>": 233,
365
+ "<0xEA>": 234,
366
+ "<0xEB>": 235,
367
+ "<0xEC>": 236,
368
+ "<0xED>": 237,
369
+ "<0xEE>": 238,
370
+ "<0xEF>": 239,
371
+ "<0xF0>": 240,
372
+ "<0xF1>": 241,
373
+ "<0xF2>": 242,
374
+ "<0xF3>": 243,
375
+ "<0xF4>": 244,
376
+ "<0xF5>": 245,
377
+ "<0xF6>": 246,
378
+ "<0xF7>": 247,
379
+ "<0xF8>": 248,
380
+ "<0xF9>": 249,
381
+ "<0xFA>": 250,
382
+ "<0xFB>": 251,
383
+ "<0xFC>": 252,
384
+ "<0xFD>": 253,
385
+ "<0xFE>": 254,
386
+ "<0xFF>": 255,
387
+ "<pad>": 0,
388
+ "<unk>": 1,
389
+ "<bos>": 2,
390
+ "<eos>": 3,
391
+ "<sep>": 4
392
+ },
393
+ "merges": []
394
+ }
395
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<unk>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<bos>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<eos>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ }
37
+ },
38
+ "bos_token": "<bos>",
39
+ "clean_up_tokenization_spaces": false,
40
+ "eos_token": "<eos>",
41
+ "legacy": true,
42
+ "model_max_length": 2048,
43
+ "pad_token": "<pad>",
44
+ "spaces_between_special_tokens": false,
45
+ "tokenizer_class": "LightbrainTokenizer",
46
+ "unk_token": "<unk>",
47
+ "use_default_system_prompt": false
48
+ }