| { | |
| "output_dir": "hindi_tokenizer_sota", | |
| "model_type": "unigram", | |
| "character_coverage": 0.9995, | |
| "max_examples": 300000, | |
| "sample_factor": 1.0, | |
| "create_hf_config": true, | |
| "debug": false, | |
| "seed": 42 | |
| } |
| { | |
| "output_dir": "hindi_tokenizer_sota", | |
| "model_type": "unigram", | |
| "character_coverage": 0.9995, | |
| "max_examples": 300000, | |
| "sample_factor": 1.0, | |
| "create_hf_config": true, | |
| "debug": false, | |
| "seed": 42 | |
| } |