| { | |
| "name": "SAM Tokenizer", | |
| "architecture": "NexusBPE", | |
| "organization": "AMFORGE", | |
| "version": "1.0", | |
| "vocab_size": 12000, | |
| "pad_id": 0, | |
| "bos_id": 1, | |
| "eos_id": 2, | |
| "unk_id": 3, | |
| "domains": [ | |
| "ROS", | |
| "HTTP", | |
| "MQTT", | |
| "DB", | |
| "WORKFLOW", | |
| "ECOMMERCE", | |
| "VEHICLE", | |
| "HOME", | |
| "CAL", | |
| "FILE" | |
| ], | |
| "structural_markers": [ | |
| "<SCHEMA>", | |
| "</SCHEMA>", | |
| "<TASK>", | |
| "</TASK>", | |
| "<JSON>", | |
| "</JSON>", | |
| "<ACTION>", | |
| "</ACTION>", | |
| "<META>", | |
| "</META>" | |
| ], | |
| "guarantees": [ | |
| "atomic numerics in supported ranges", | |
| "atomic domain markers and structural tags", | |
| "deterministic encoding", | |
| "bit-perfect roundtrip on structured payloads" | |
| ], | |
| "license": "apache-2.0", | |
| "homepage": "https://huggingface.co/AMFORGE/sam_tokenizer" | |
| } |