mohitmayank commited on
Commit
4d98aea
·
verified ·
1 Parent(s): b546e86

Upload tokenizer

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<audio_start>",
4
+ "<audio_end>",
5
+ "<layer_sep>",
6
+ "<snac_pad>",
7
+ "<burps>",
8
+ "<sadly>",
9
+ "<smooches>",
10
+ "<chuckles>",
11
+ "<moans>",
12
+ "<whispers>",
13
+ "<stutters>",
14
+ "<sighing>",
15
+ "<Chuckles>",
16
+ "<천잰소리>",
17
+ "<gasps>",
18
+ "<sighs>",
19
+ "<laughs>",
20
+ "<scoffs>",
21
+ "<sonora>",
22
+ "<sigh>",
23
+ "<giggles>",
24
+ "<Chuckle>",
25
+ "<sniffs>",
26
+ "<chewing>",
27
+ "<singing>",
28
+ "<yawning>",
29
+ "<coughs>",
30
+ "<music>"
31
+ ],
32
+ "boi_token": "<start_of_image>",
33
+ "bos_token": {
34
+ "content": "<bos>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ },
40
+ "eoi_token": "<end_of_image>",
41
+ "eos_token": {
42
+ "content": "<eos>",
43
+ "lstrip": false,
44
+ "normalized": false,
45
+ "rstrip": false,
46
+ "single_word": false
47
+ },
48
+ "image_token": "<image_soft_token>",
49
+ "pad_token": {
50
+ "content": "<pad>",
51
+ "lstrip": false,
52
+ "normalized": false,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
+ "unk_token": {
57
+ "content": "<unk>",
58
+ "lstrip": false,
59
+ "normalized": false,
60
+ "rstrip": false,
61
+ "single_word": false
62
+ }
63
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80a5e717060527b6924bc0f9c733cd4037c60f5f6bf737b5d0f771d15657820a
3
+ size 35733453
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff