File size: 806 Bytes
a5c80e8
ae968c7
 
 
 
a5c80e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae968c7
a5c80e8
 
 
 
 
 
 
 
 
 
 
ae968c7
 
 
 
 
 
a5c80e8
ae968c7
a5c80e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
{
  "name": "SAM Tokenizer",
  "architecture": "NexusBPE",
  "organization": "AMFORGE",
  "version": "1.0",
  "vocab_size": 12000,
  "pad_id": 0,
  "bos_id": 1,
  "eos_id": 2,
  "unk_id": 3,
  "domains": [
    "ROS",
    "HTTP",
    "MQTT",
    "DB",
    "WORKFLOW",
    "ECOMMERCE",
    "VEHICLE",
    "HOME",
    "CAL",
    "FILE"
  ],
  "structural_markers": [
    "<SCHEMA>",
    "</SCHEMA>",
    "<TASK>",
    "</TASK>",
    "<JSON>",
    "</JSON>",
    "<ACTION>",
    "</ACTION>",
    "<META>",
    "</META>"
  ],
  "guarantees": [
    "atomic numerics in supported ranges",
    "atomic domain markers and structural tags",
    "deterministic encoding",
    "bit-perfect roundtrip on structured payloads"
  ],
  "license": "apache-2.0",
  "homepage": "https://huggingface.co/AMFORGE/sam_tokenizer"
}