| { |
| "decode_with_special_keep": [ |
| "[CLS] hello, world! [SEP]", |
| "[CLS] the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog. [SEP]", |
| "[CLS] 日 本 語 のテスト [UNK] emoji [SEP]", |
| "[CLS] indented text [SEP]", |
| "[CLS] def foo ( x ) : return x + 1 [SEP]", |
| "[CLS] < | begin _ of _ text | > hello < | end _ of _ text | > [SEP]", |
| "[CLS] [CLS] sentence a [SEP] sentence b [SEP] [SEP]", |
| "[CLS] [SEP]", |
| "[CLS] a [SEP]", |
| "[CLS] leading and trailing [SEP]", |
| "[CLS] mixed 123 with numbers 4567 and symbols! @ # $ % ^ & * ( ) [SEP]", |
| "[CLS] newline three [SEP]", |
| "[CLS] tab tab tab [SEP]", |
| "[CLS] quote \" double \" and ' single ' and ` backtick ` [SEP]", |
| "[CLS] url : https : / / example. com / path? query = value & other = 1 [SEP]", |
| "[CLS] email : alice @ example. com, bob @ foo. io [SEP]", |
| "[CLS] 中 文 [UNK] [UNK] with english mixed [SEP]", |
| "[CLS] repeating aaaaaaaaaaaa and bbbbbbbbbbbb [SEP]", |
| "[CLS] emoji rain [UNK] and stars [UNK] [SEP]", |
| "[CLS] code : ` int main ( ) { return 0 ; } ` [SEP]" |
| ], |
| "decode_with_special_skip": [ |
| "hello, world!", |
| "the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog.", |
| "日 本 語 のテスト emoji", |
| "indented text", |
| "def foo ( x ) : return x + 1", |
| "< | begin _ of _ text | > hello < | end _ of _ text | >", |
| "sentence a sentence b", |
| "", |
| "a", |
| "leading and trailing", |
| "mixed 123 with numbers 4567 and symbols! @ # $ % ^ & * ( )", |
| "newline three", |
| "tab tab tab", |
| "quote \" double \" and ' single ' and ` backtick `", |
| "url : https : / / example. com / path? query = value & other = 1", |
| "email : alice @ example. com, bob @ foo. io", |
| "中 文 with english mixed", |
| "repeating aaaaaaaaaaaa and bbbbbbbbbbbb", |
| "emoji rain and stars", |
| "code : ` int main ( ) { return 0 ; } `" |
| ], |
| "decode_no_special": [ |
| "hello, world!", |
| "the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog.", |
| "日 本 語 のテスト [UNK] emoji", |
| "indented text", |
| "def foo ( x ) : return x + 1", |
| "< | begin _ of _ text | > hello < | end _ of _ text | >", |
| "[CLS] sentence a [SEP] sentence b [SEP]", |
| "", |
| "a", |
| "leading and trailing", |
| "mixed 123 with numbers 4567 and symbols! @ # $ % ^ & * ( )", |
| "newline three", |
| "tab tab tab", |
| "quote \" double \" and ' single ' and ` backtick `", |
| "url : https : / / example. com / path? query = value & other = 1", |
| "email : alice @ example. com, bob @ foo. io", |
| "中 文 [UNK] [UNK] with english mixed", |
| "repeating aaaaaaaaaaaa and bbbbbbbbbbbb", |
| "emoji rain [UNK] and stars [UNK]", |
| "code : ` int main ( ) { return 0 ; } `" |
| ] |
| } |