iDSLR commited on
Commit
fb3b4fc
·
verified ·
1 Parent(s): cda729a

새롭게 업데이트된 토크나이저

Browse files
added_tokens.json CHANGED
@@ -1,10 +1,14 @@
1
  {
2
  "$~bos$": 50257,
3
- "$~dev$": 50262,
4
  "$~eos$": 50256,
5
- "$~me$": 50261,
 
 
 
6
  "$~pad$": 50258,
7
- "$~somebody$": 50263,
8
- "$~the_flow_of_thought$": 50260,
 
9
  "$~unk$": 50259
10
  }
 
1
  {
2
  "$~bos$": 50257,
3
+ "$~dev$": 50260,
4
  "$~eos$": 50256,
5
+ "$~func-continue$": 50263,
6
+ "$~func-time$": 50262,
7
+ "$~info$": 50264,
8
+ "$~me$": 50265,
9
  "$~pad$": 50258,
10
+ "$~somebody$": 50266,
11
+ "$~tfot$": 50261,
12
+ "$~time$": 50267,
13
  "$~unk$": 50259
14
  }
special_tokens_map.json CHANGED
@@ -1,4 +1,62 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "$~bos$",
4
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "$~dev$",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "$~tfot$",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "$~func-time$",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "$~func-continue$",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "$~info$",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "$~me$",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "$~somebody$",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "$~time$",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ }
59
+ ],
60
  "bos_token": {
61
  "content": "$~bos$",
62
  "lstrip": false,
tokenizer.json CHANGED
@@ -41,8 +41,8 @@
41
  },
42
  {
43
  "id": 50260,
44
- "content": "$~the_flow_of_thought$",
45
- "single_word": true,
46
  "lstrip": false,
47
  "rstrip": false,
48
  "normalized": false,
@@ -50,8 +50,8 @@
50
  },
51
  {
52
  "id": 50261,
53
- "content": "$~me$",
54
- "single_word": true,
55
  "lstrip": false,
56
  "rstrip": false,
57
  "normalized": false,
@@ -59,8 +59,8 @@
59
  },
60
  {
61
  "id": 50262,
62
- "content": "$~dev$",
63
- "single_word": true,
64
  "lstrip": false,
65
  "rstrip": false,
66
  "normalized": false,
@@ -68,8 +68,44 @@
68
  },
69
  {
70
  "id": 50263,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  "content": "$~somebody$",
72
- "single_word": true,
 
 
 
 
 
 
 
 
 
73
  "lstrip": false,
74
  "rstrip": false,
75
  "normalized": false,
 
41
  },
42
  {
43
  "id": 50260,
44
+ "content": "$~dev$",
45
+ "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
  "normalized": false,
 
50
  },
51
  {
52
  "id": 50261,
53
+ "content": "$~tfot$",
54
+ "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
  "normalized": false,
 
59
  },
60
  {
61
  "id": 50262,
62
+ "content": "$~func-time$",
63
+ "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
  "normalized": false,
 
68
  },
69
  {
70
  "id": 50263,
71
+ "content": "$~func-continue$",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 50264,
80
+ "content": "$~info$",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 50265,
89
+ "content": "$~me$",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 50266,
98
  "content": "$~somebody$",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 50267,
107
+ "content": "$~time$",
108
+ "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
  "normalized": false,
tokenizer_config.json CHANGED
@@ -35,38 +35,80 @@
35
  "special": true
36
  },
37
  "50260": {
38
- "content": "$~the_flow_of_thought$",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
- "single_word": true,
43
  "special": true
44
  },
45
  "50261": {
46
- "content": "$~me$",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
- "single_word": true,
51
  "special": true
52
  },
53
  "50262": {
54
- "content": "$~dev$",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
58
- "single_word": true,
59
  "special": true
60
  },
61
  "50263": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  "content": "$~somebody$",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
66
- "single_word": true,
 
 
 
 
 
 
 
 
67
  "special": true
68
  }
69
  },
 
 
 
 
 
 
 
 
 
 
70
  "bos_token": "$~bos$",
71
  "clean_up_tokenization_spaces": true,
72
  "eos_token": "$~eos$",
 
35
  "special": true
36
  },
37
  "50260": {
38
+ "content": "$~dev$",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
+ "single_word": false,
43
  "special": true
44
  },
45
  "50261": {
46
+ "content": "$~tfot$",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
+ "single_word": false,
51
  "special": true
52
  },
53
  "50262": {
54
+ "content": "$~func-time$",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
58
+ "single_word": false,
59
  "special": true
60
  },
61
  "50263": {
62
+ "content": "$~func-continue$",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "50264": {
70
+ "content": "$~info$",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "50265": {
78
+ "content": "$~me$",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "50266": {
86
  "content": "$~somebody$",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "50267": {
94
+ "content": "$~time$",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
  "special": true
100
  }
101
  },
102
+ "additional_special_tokens": [
103
+ "$~dev$",
104
+ "$~tfot$",
105
+ "$~func-time$",
106
+ "$~func-continue$",
107
+ "$~info$",
108
+ "$~me$",
109
+ "$~somebody$",
110
+ "$~time$"
111
+ ],
112
  "bos_token": "$~bos$",
113
  "clean_up_tokenization_spaces": true,
114
  "eos_token": "$~eos$",