File size: 806 Bytes
a5c80e8 ae968c7 a5c80e8 ae968c7 a5c80e8 ae968c7 a5c80e8 ae968c7 a5c80e8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | {
"name": "SAM Tokenizer",
"architecture": "NexusBPE",
"organization": "AMFORGE",
"version": "1.0",
"vocab_size": 12000,
"pad_id": 0,
"bos_id": 1,
"eos_id": 2,
"unk_id": 3,
"domains": [
"ROS",
"HTTP",
"MQTT",
"DB",
"WORKFLOW",
"ECOMMERCE",
"VEHICLE",
"HOME",
"CAL",
"FILE"
],
"structural_markers": [
"<SCHEMA>",
"</SCHEMA>",
"<TASK>",
"</TASK>",
"<JSON>",
"</JSON>",
"<ACTION>",
"</ACTION>",
"<META>",
"</META>"
],
"guarantees": [
"atomic numerics in supported ranges",
"atomic domain markers and structural tags",
"deterministic encoding",
"bit-perfect roundtrip on structured payloads"
],
"license": "apache-2.0",
"homepage": "https://huggingface.co/AMFORGE/sam_tokenizer"
} |