| { | |
| "output_root": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38", | |
| "files": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38/train_valid_test_split_output/2020-12-23-1905-daf4-a0e0/train.txt", | |
| "vocab_name": "roberta-ca", | |
| "clean_text": true, | |
| "handle_chinese_chars": true, | |
| "strip_accents": false, | |
| "lowercase": false, | |
| "vocab_size": 52000, | |
| "limit_alphabet": 1000, | |
| "show_progress": true, | |
| "min_frequency": 2, | |
| "extra_tokens": [], | |
| "reserve_tokens": 0, | |
| "tokenizer": "bbpe", | |
| "commit_hash": "daf4d660ec8a4b28d2bc29b3063779100ab85796\n" | |
| } |