Ransaka commited on
Commit
ddf49e2
·
verified ·
1 Parent(s): 77e8116

Added tokenizer with large sinhala corpus

Browse files
Files changed (1) hide show
  1. config.json +5 -5
config.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "unknown_token": "<|unk|>",
3
  "pad_token": "<|pad|>",
4
- "unknown_token_id": 1015,
5
- "pad_token_id": 1016,
6
- "max_length": 256,
7
- "end_of_text_token": "<|endoftext|>",
8
- "end_of_text_token_id": 1017
9
  }
 
1
  {
2
  "unknown_token": "<|unk|>",
3
  "pad_token": "<|pad|>",
4
+ "unknown_token_id": 576,
5
+ "pad_token_id": 577,
6
+ "max_length": 10,
7
+ "end_of_text_token": "<|end_of_text|>",
8
+ "end_of_text_token_id": 578
9
  }