babylm-mop-10m-gpt2 / processor_config.json
NeTS-lab's picture
Upload 4 files
f1fb89a verified
raw
history blame
462 Bytes
{
"processor_class": "MorPieceProcessor",
"auto_map": {
"AutoProcessor": "morpiece_processor.MorPieceProcessor"
},
"tokenizer_class": "MorPieceTokenizer",
"feature_extractor_class": null,
"image_processor_class": null,
"audio_processor_class": null,
"morpiece_config": {
"vocab_size": 29066,
"min_frequency": 10,
"cutoff": 100,
"bf": 10,
"use_tokenizers_lib": true,
"processor_type": "text_only"
}
}