File size: 2,949 Bytes
f41659a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
{
  "decode_with_special_keep": [
    "[CLS] hello, world! [SEP]",
    "[CLS] the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog. [SEP]",
    "[CLS] 日 本 語 のテスト [UNK] emoji [SEP]",
    "[CLS] indented text [SEP]",
    "[CLS] def foo ( x ) : return x + 1 [SEP]",
    "[CLS] < | begin _ of _ text | > hello < | end _ of _ text | > [SEP]",
    "[CLS] [CLS] sentence a [SEP] sentence b [SEP] [SEP]",
    "[CLS] [SEP]",
    "[CLS] a [SEP]",
    "[CLS] leading and trailing [SEP]",
    "[CLS] mixed 123 with numbers 4567 and symbols! @ # $ % ^ & * ( ) [SEP]",
    "[CLS] newline three [SEP]",
    "[CLS] tab tab tab [SEP]",
    "[CLS] quote \" double \" and ' single ' and ` backtick ` [SEP]",
    "[CLS] url : https : / / example. com / path? query = value & other = 1 [SEP]",
    "[CLS] email : alice @ example. com, bob @ foo. io [SEP]",
    "[CLS] 中 文 [UNK] [UNK] with english mixed [SEP]",
    "[CLS] repeating aaaaaaaaaaaa and bbbbbbbbbbbb [SEP]",
    "[CLS] emoji rain [UNK] and stars [UNK] [SEP]",
    "[CLS] code : ` int main ( ) { return 0 ; } ` [SEP]"
  ],
  "decode_with_special_skip": [
    "hello, world!",
    "the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog.",
    "日 本 語 のテスト emoji",
    "indented text",
    "def foo ( x ) : return x + 1",
    "< | begin _ of _ text | > hello < | end _ of _ text | >",
    "sentence a sentence b",
    "",
    "a",
    "leading and trailing",
    "mixed 123 with numbers 4567 and symbols! @ # $ % ^ & * ( )",
    "newline three",
    "tab tab tab",
    "quote \" double \" and ' single ' and ` backtick `",
    "url : https : / / example. com / path? query = value & other = 1",
    "email : alice @ example. com, bob @ foo. io",
    "中 文 with english mixed",
    "repeating aaaaaaaaaaaa and bbbbbbbbbbbb",
    "emoji rain and stars",
    "code : ` int main ( ) { return 0 ; } `"
  ],
  "decode_no_special": [
    "hello, world!",
    "the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog.",
    "日 本 語 のテスト [UNK] emoji",
    "indented text",
    "def foo ( x ) : return x + 1",
    "< | begin _ of _ text | > hello < | end _ of _ text | >",
    "[CLS] sentence a [SEP] sentence b [SEP]",
    "",
    "a",
    "leading and trailing",
    "mixed 123 with numbers 4567 and symbols! @ # $ % ^ & * ( )",
    "newline three",
    "tab tab tab",
    "quote \" double \" and ' single ' and ` backtick `",
    "url : https : / / example. com / path? query = value & other = 1",
    "email : alice @ example. com, bob @ foo. io",
    "中 文 [UNK] [UNK] with english mixed",
    "repeating aaaaaaaaaaaa and bbbbbbbbbbbb",
    "emoji rain [UNK] and stars [UNK]",
    "code : ` int main ( ) { return 0 ; } `"
  ]
}