sam_tokenizer / tokenizer_config.json
ameforge's picture
Upload tokenizer_config.json with huggingface_hub
ae968c7 verified
{
"name": "SAM Tokenizer",
"architecture": "NexusBPE",
"organization": "AMFORGE",
"version": "1.0",
"vocab_size": 12000,
"pad_id": 0,
"bos_id": 1,
"eos_id": 2,
"unk_id": 3,
"domains": [
"ROS",
"HTTP",
"MQTT",
"DB",
"WORKFLOW",
"ECOMMERCE",
"VEHICLE",
"HOME",
"CAL",
"FILE"
],
"structural_markers": [
"<SCHEMA>",
"</SCHEMA>",
"<TASK>",
"</TASK>",
"<JSON>",
"</JSON>",
"<ACTION>",
"</ACTION>",
"<META>",
"</META>"
],
"guarantees": [
"atomic numerics in supported ranges",
"atomic domain markers and structural tags",
"deterministic encoding",
"bit-perfect roundtrip on structured payloads"
],
"license": "apache-2.0",
"homepage": "https://huggingface.co/AMFORGE/sam_tokenizer"
}