| { | |
| "tokenizer_path_name": null, | |
| "vocab_name": "bio-clinical", | |
| "tokenizer": "bbpe-roberta", | |
| "lowercase": false, | |
| "vocab_size": 50262, | |
| "min_frequency": 6, | |
| "extra_tokens": [], | |
| "limit_alphabet": 1000, | |
| "max_len": 512, | |
| "no_show_progress": false, | |
| "strip_accents": false, | |
| "no_handle_chinese_chars": false, | |
| "no_clean_text": false, | |
| "reserve_tokens": 0, | |
| "use_tokenizers": false, | |
| "no_fairseq": false, | |
| "bbpe_add_prefix_space": true, | |
| "single_paragraph_add_punct": true, | |
| "tok_batch_size": 100000000, | |
| "files": [ | |
| "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/train.txt", | |
| "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/valid.txt", | |
| "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/test.txt" | |
| ], | |
| "output_root_path": "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e", | |
| "commit_hash": "d1d3920e7012caf14c9d6968fded36e0dd719a51" | |
| } |