File size: 211 Bytes
7e8e702
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
{
  "output_dir": "hindi_tokenizer_sota",
  "model_type": "unigram",
  "character_coverage": 0.9995,
  "max_examples": 300000,
  "sample_factor": 1.0,
  "create_hf_config": true,
  "debug": false,
  "seed": 42
}