nabeix commited on
Commit
7ed7b74
·
1 Parent(s): 39fbff2

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endofprompt|>": 100276,
3
+ "<|im_end|>": 100265,
4
+ "<|im_start|>": 100264
5
+ }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "saved_models/hf/biustnaspus/puszek20",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "merged_models",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:263421adac0724dac4f6754f64d05446d8bba64ea03feb3841c0ab7b8a72750c
3
  size 4938143568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d2c342cd775281c14be110a0896b6d74bcf85c8dce165195ccf7c5719872066
3
  size 4938143568
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f601a7e014f7088f516591fc23275aae294e3093f1b63a90bbf0d078b947a6e
3
  size 4893374584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e7fe6b94088998d7c8a008e9dd353a52af5c72d2e4d8c38048121766bb6d31f
3
  size 4893374584
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10ab888387bbbb7b6a3979b22d427ccd41fe0e3193d6e9a65cbbc423ae0190d0
3
  size 4416786288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca878f9329b54dd2309ff01b5743de82dca105c8106214f1e8513e6a567affaf
3
  size 4416786288
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e145736957627c18b457e25ce34892acc68a88459c6f372dcfdeae0a18f90462
3
  size 1182007424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f48daf219c372914a947da2335d8a22b966b419fb9ca80f2538ed797c128a75
3
  size 1182007424
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "100257": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "100258": {
13
+ "content": "<|fim_prefix|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "100259": {
21
+ "content": "<|fim_middle|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100260": {
29
+ "content": "<|fim_suffix|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "100264": {
37
+ "content": "<|im_start|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "100265": {
45
+ "content": "<|im_end|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "100276": {
53
+ "content": "<|endofprompt|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ }
60
+ },
61
+ "bos_token": "<|endoftext|>",
62
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
63
+ "clean_up_tokenization_spaces": false,
64
+ "eos_token": "<|endoftext|>",
65
+ "model_max_length": 8192,
66
+ "tokenizer_class": "GPT2Tokenizer",
67
+ "unk_token": "<|endoftext|>"
68
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff