| { |
| "tokenizer_path_name": null, |
| "vocab_name": "biomedical", |
| "tokenizer": "bbpe-roberta", |
| "lowercase": false, |
| "vocab_size": 50262, |
| "min_frequency": 6, |
| "extra_tokens": [], |
| "limit_alphabet": 1000, |
| "max_len": 512, |
| "no_show_progress": false, |
| "strip_accents": false, |
| "no_handle_chinese_chars": false, |
| "no_clean_text": false, |
| "reserve_tokens": 0, |
| "use_tokenizers": false, |
| "no_fairseq": false, |
| "bbpe_add_prefix_space": true, |
| "single_paragraph_add_punct": true, |
| "tok_batch_size": 100000000, |
| "files": [ |
| "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/biomedical-vocab-50262-2021-12-09-1207-d1d3-e42b/train_valid_test_split_output/biomedical-2021-12-09-1210-d1d3-ad85/train.txt", |
| "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/biomedical-vocab-50262-2021-12-09-1207-d1d3-e42b/train_valid_test_split_output/biomedical-2021-12-09-1210-d1d3-ad85/valid.txt", |
| "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/biomedical-vocab-50262-2021-12-09-1207-d1d3-e42b/train_valid_test_split_output/biomedical-2021-12-09-1210-d1d3-ad85/test.txt" |
| ], |
| "output_root_path": "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/biomedical-vocab-50262-2021-12-09-1207-d1d3-e42b", |
| "commit_hash": "d1d3920e7012caf14c9d6968fded36e0dd719a51" |
| } |