TAILHS-Webapp-Project / tokenizer.json
theoneandonlyboogel's picture
Upload 6 files
74d5e1c verified
raw
history blame contribute delete
965 Bytes
{
"tokenizers": [
{
"name": "AA",
"tokenizer_id": 0,
"file": "t5_tokenizer_AA_special.json",
"start_delimiter": "<start_AA>",
"end_delimiter": "<end_AA>"
},
{
"name": "SMILES",
"tokenizer_id": 1,
"file": "bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json",
"start_delimiter": "<start_SMILES>",
"end_delimiter": "<end_SMILES>"
},
{
"name": "CELL_ATTRIBUTES",
"tokenizer_id": 2,
"file": "cell_attributes_tokenizer.json",
"start_delimiter": "<start_CELL_ATTRIBUTES>",
"end_delimiter": "<end_CELL_ATTRIBUTES>"
},
{
"name": "GENE",
"tokenizer_id": 3,
"file": "gene_tokenizer.json",
"start_delimiter": "<start_GENE>",
"end_delimiter": "<end_GENE>",
"minimal_token_id": 5000
}
],
"max_possible_token_id": 100000,
"max_special_token_id": 500
}