chandraliuswanto commited on
Commit
9d32825
·
verified ·
1 Parent(s): 8b7885b

Training in progress, epoch 0

Browse files
Files changed (2) hide show
  1. preprocessor_config.json +18 -0
  2. tokenizer.json +4 -4
preprocessor_config.json CHANGED
@@ -1,4 +1,22 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "do_align_long_axis": false,
3
  "do_normalize": true,
4
  "do_pad": true,
 
1
  {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_thumbnail",
8
+ "do_align_long_axis",
9
+ "do_pad",
10
+ "random_padding",
11
+ "do_rescale",
12
+ "rescale_factor",
13
+ "do_normalize",
14
+ "image_mean",
15
+ "image_std",
16
+ "return_tensors",
17
+ "data_format",
18
+ "input_data_format"
19
+ ],
20
  "do_align_long_axis": false,
21
  "do_normalize": true,
22
  "do_pad": true,
tokenizer.json CHANGED
@@ -303,8 +303,8 @@
303
  "pre_tokenizer": {
304
  "type": "Metaspace",
305
  "replacement": "▁",
306
- "add_prefix_space": true,
307
- "prepend_scheme": "always"
308
  },
309
  "post_processor": {
310
  "type": "TemplateProcessing",
@@ -390,8 +390,8 @@
390
  "decoder": {
391
  "type": "Metaspace",
392
  "replacement": "▁",
393
- "add_prefix_space": true,
394
- "prepend_scheme": "always"
395
  },
396
  "model": {
397
  "type": "Unigram",
 
303
  "pre_tokenizer": {
304
  "type": "Metaspace",
305
  "replacement": "▁",
306
+ "prepend_scheme": "always",
307
+ "split": true
308
  },
309
  "post_processor": {
310
  "type": "TemplateProcessing",
 
390
  "decoder": {
391
  "type": "Metaspace",
392
  "replacement": "▁",
393
+ "prepend_scheme": "always",
394
+ "split": true
395
  },
396
  "model": {
397
  "type": "Unigram",