gogpt-test / tokenizer_info.json
armansa1's picture
Upload tokenizer_info.json with huggingface_hub
9cd8893 verified
{
"vocab_size": 10303,
"pad_token_id": 0,
"mf_start_token_id": 1,
"mf_end_token_id": 2,
"bp_start_token_id": 3,
"bp_end_token_id": 4,
"cc_start_token_id": 5,
"cc_end_token_id": 6,
"organism_vocab_size": 201,
"embed_model_type": "esm2",
"max_protein_length": 1024,
"vocab_config": {
"MF": {
"method": "min_freq",
"value": 20
},
"BP": {
"method": "min_freq",
"value": 20
},
"CC": {
"method": "min_freq",
"value": 20
}
},
"vocab_stats": {
"MF": {
"method": "min_freq",
"threshold": 20,
"selected": 1537,
"freq_range": [
20,
73296
],
"total_terms": 7183
},
"BP": {
"method": "min_freq",
"threshold": 20,
"selected": 7720,
"freq_range": [
20,
81287
],
"total_terms": 21200
},
"CC": {
"method": "min_freq",
"threshold": 20,
"selected": 1039,
"freq_range": [
20,
84240
],
"total_terms": 2939
}
}
}