File size: 2,309 Bytes

b46df25

{
  "version": "1.0",
  "truncation": null,
  "padding": null,
  "added_tokens": [
    {
      "id": 0,
      "content": "<unk>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 1,
      "content": "<cls>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 2,
      "content": "<sep>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 3,
      "content": "<pad>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 4,
      "content": "<mask>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 5,
      "content": "<s>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 6,
      "content": "</s>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    }
  ],
  "normalizer": {
    "type": "Sequence",
    "normalizers": [
      {
        "type": "Replace",
        "pattern": {
          "String": "``"
        },
        "content": "\""
      },
      {
        "type": "Replace",
        "pattern": {
          "String": "''"
        },
        "content": "\""
      },
      {
        "type": "NFKD"
      },
      {
        "type": "StripAccents"
      },
      {
        "type": "Replace",
        "pattern": {
          "Regex": " {2,}"
        },
        "content": " "
      }
    ]
  },
  "pre_tokenizer": {
    "type": "Metaspace",
    "replacement": "▁",
    "add_prefix_space": true,
    "prepend_scheme": "always"
  },
  "post_processor": null,
  "decoder": {
    "type": "Metaspace",
    "replacement": "▁",
    "add_prefix_space": true,
    "prepend_scheme": "always"
  },
  "model": {
    "type": "Unigram",
    "unk_id": 0,
    "vocab": [
      [
        "<unk>",
        0.0
      ]
    ],
    "byte_fallback": false
  }
}