suchirsalhan commited on
Commit
3a62a5c
·
verified ·
1 Parent(s): dbfd128

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. tokenizer.json +26 -1
  2. tokenizer_config.json +1 -0
tokenizer.json CHANGED
@@ -47,7 +47,32 @@
47
  "prepend_scheme": "first",
48
  "split": false
49
  },
50
- "post_processor": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  "decoder": {
52
  "type": "Sequence",
53
  "decoders": [
 
47
  "prepend_scheme": "first",
48
  "split": false
49
  },
50
+ "post_processor": {
51
+ "type": "TemplateProcessing",
52
+ "single": [
53
+ {
54
+ "Sequence": {
55
+ "id": "A",
56
+ "type_id": 0
57
+ }
58
+ }
59
+ ],
60
+ "pair": [
61
+ {
62
+ "Sequence": {
63
+ "id": "A",
64
+ "type_id": 0
65
+ }
66
+ },
67
+ {
68
+ "Sequence": {
69
+ "id": "B",
70
+ "type_id": 1
71
+ }
72
+ }
73
+ ],
74
+ "special_tokens": {}
75
+ },
76
  "decoder": {
77
  "type": "Sequence",
78
  "decoders": [
tokenizer_config.json CHANGED
@@ -4,6 +4,7 @@
4
  "bos_token": "<s>",
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "</s>",
 
7
  "model_max_length": 1000000000000000019884624838656,
8
  "pad_token": "<pad>",
9
  "tokenizer_class": "LlamaTokenizer",
 
4
  "bos_token": "<s>",
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "</s>",
7
+ "model_file": "tmp_factory/tokenizer_tr/spm.model",
8
  "model_max_length": 1000000000000000019884624838656,
9
  "pad_token": "<pad>",
10
  "tokenizer_class": "LlamaTokenizer",