dataflare
/

df-arc

Model card Files Files and versions

df-arc / tokenizer_config.json

fr3on's picture

Upload folder using huggingface_hub

3b90e9e verified 3 months ago

446 Bytes

	{
	"auto_map": {
	"AutoTokenizer": [
	"tokenization_df_arc.DFArcTokenizer",
	null
	]
	},
	"tokenizer_class": "DFArcTokenizer",
	"phrases_file": "phrases.json",
	"normalization": {
	"unify_alef": true,
	"unify_yeh": true,
	"unify_teh_marbuta": true,
	"remove_diacritics": true,
	"remove_tatweel": true,
	"remove_repeats": true
	},
	"min_stem_length": 2,
	"vocab_size": 256000,
	"model_max_length": 4096
	}