nabeix commited on
Commit
d2dfd63
·
1 Parent(s): 3293572

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endofprompt|>": 100276,
3
+ "<|im_end|>": 100265,
4
+ "<|im_start|>": 100264
5
+ }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "saved_models/hf/impossibleexchange/0x117",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "/media/user/6864fd50-76ac-4ebd-b6ad-94f2710c7a71/sn37models/imbiuko22",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:753f9d808fc669049e94ab84530f2d3baaa7f0da782b6b75eb56d2d02ad25836
3
  size 4938143568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f7875fde121b64913384b9e110ce7b39c6514425ccfd4f3f8c75dd7a425f28e
3
  size 4938143568
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52736be4ca1f4686cf382a80e9a2f0c6161a9ad18f55261bbe0b165b34784e86
3
  size 4893374584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:135f61732f0e4ebc303dbbfdc076c657afa09f6d7efea62ebe369be9b68009bd
3
  size 4893374584
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09de985b36e3520c8ece4d34d9a8607e545b62d43eca3b00fef0cec8a65e3d08
3
  size 4416786288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3faa1a37329702a7daf6fd4386abd4a9506209158d90f53d88dc408bd871a2e6
3
  size 4416786288
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2902cdbb17c66976485b57af8c67210956e3592d71310c0da3dbaad6c1b5adb0
3
  size 1182007424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94ddff8823e3c39c92190bf23b1704db36f4ac9319051441e6a424a0512f0647
3
  size 1182007424
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "100257": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "100258": {
13
+ "content": "<|fim_prefix|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "100259": {
21
+ "content": "<|fim_middle|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100260": {
29
+ "content": "<|fim_suffix|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "100264": {
37
+ "content": "<|im_start|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "100265": {
45
+ "content": "<|im_end|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "100276": {
53
+ "content": "<|endofprompt|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ }
60
+ },
61
+ "bos_token": "<|endoftext|>",
62
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
63
+ "clean_up_tokenization_spaces": false,
64
+ "eos_token": "<|endoftext|>",
65
+ "model_max_length": 8192,
66
+ "tokenizer_class": "GPT2Tokenizer",
67
+ "unk_token": "<|endoftext|>",
68
+ "use_safetensors": true
69
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff