dvitvaai
/

pothana-base-300M

Text Generation

text-generation-inference

Model card Files Files and versions

pothana-base-300M / tokenizer_config.json

neshkatrapati's picture

Upload folder using huggingface_hub

715004d verified 25 days ago

history blame contribute delete

771 Bytes

	{
	"tokenizer_class": "PreTrainedTokenizerFast",
	"auto_map": {
	"AutoTokenizer": [
	null,
	"tokenizer_class.TeluguTokenizer"
	]
	},
	"model_type": "llama",
	"bos_token": "<bos>",
	"eos_token": "<eos>",
	"unk_token": "<unk>",
	"pad_token": "<pad>",
	"add_bos_token": true,
	"add_eos_token": false,
	"clean_up_tokenization_spaces": false,
	"model_max_length": 2048,
	"extra_info": {
	"type": "morfessor_bpe_telugu",
	"separator": "@@",
	"note": "This tokenizer expects Morfessor-segmented text as input. For raw Telugu text, run Morfessor segmentation first using the included morfessor_telugu.bin model. Tokens ending with '@@' are continuation pieces that join to the next token. The decoder handles @@ removal automatically."
	}
	}