| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "[UNK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 159, | |
| "content": "[BOS]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 160, | |
| "content": "[EOS]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 161, | |
| "content": "[SEP]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 162, | |
| "content": "[PAD]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 163, | |
| "content": "[CLS]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 164, | |
| "content": "[MASK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": { | |
| "type": "Sequence", | |
| "normalizers": [ | |
| { | |
| "type": "Replace", | |
| "pattern": { | |
| "String": "++" | |
| }, | |
| "content": "+2" | |
| }, | |
| { | |
| "type": "Replace", | |
| "pattern": { | |
| "String": "--" | |
| }, | |
| "content": "-2" | |
| }, | |
| { | |
| "type": "Strip", | |
| "strip_left": true, | |
| "strip_right": true | |
| } | |
| ] | |
| }, | |
| "pre_tokenizer": { | |
| "outer": "Br?|Cl?|F|I|N|O|P|S|b|c|n|o|p|s|\\*|[\\.\\-=\\#\\$:/\\\\]|\\d|%|\\(|\\)|\\[.*?]", | |
| "inner": "(\\d+)?(A[c|g|l|m|r|s|t|u]|B[a|e|h|i|k|r]?|C[a|d|e|f|l|m|n|o|r|s|u]?|D[b|s|y]|E[r|s|u]|F[e|l|m|r]?|G[a|d|e]|H[e|f|g|o|s]?|I[n|r]?|Kr?|L[a|i|r|u|v]|M[c|d|g|n|o|t]|N[a|b|d|e|h|i|o|p]?|O[g|s]?|P[a|b|d|m|o|r|t|u]?|R[a|b|e|f|g|h|n|u]|S[b|c|e|g|i|m|n|r]?|T[a|b|c|e|h|i|l|m|s]|U|V|W|Xe|Yb?|Z[n|r]|as|b|c|n|o|p|se?|\\*)(?:(@(?:@|AL|OH|SP|T[B|H])?)(\\d{1,2})?)?(?:(H)(\\d)?)?(?:([+-]{1,2})(\\d{0,2}))?(?:(:)(\\d+))?" | |
| }, | |
| "post_processor": null, | |
| "decoder": { | |
| "type": "Fuse" | |
| }, | |
| "model": { | |
| "type": "WordLevel", | |
| "vocab": { | |
| "[UNK]": 0, | |
| "#": 1, | |
| "$": 2, | |
| "%": 3, | |
| "(": 4, | |
| ")": 5, | |
| "*": 6, | |
| "+": 7, | |
| "-": 8, | |
| ".": 9, | |
| "/": 10, | |
| "0": 11, | |
| "1": 12, | |
| "2": 13, | |
| "3": 14, | |
| "4": 15, | |
| "5": 16, | |
| "6": 17, | |
| "7": 18, | |
| "8": 19, | |
| "9": 20, | |
| ":": 21, | |
| "=": 22, | |
| "@": 23, | |
| "@@": 24, | |
| "@AL": 25, | |
| "@OH": 26, | |
| "@SP": 27, | |
| "@TB": 28, | |
| "@TH": 29, | |
| "Ac": 30, | |
| "Ag": 31, | |
| "Al": 32, | |
| "Am": 33, | |
| "Ar": 34, | |
| "As": 35, | |
| "At": 36, | |
| "Au": 37, | |
| "B": 38, | |
| "Ba": 39, | |
| "Be": 40, | |
| "Bh": 41, | |
| "Bi": 42, | |
| "Bk": 43, | |
| "Br": 44, | |
| "C": 45, | |
| "Ca": 46, | |
| "Cd": 47, | |
| "Ce": 48, | |
| "Cf": 49, | |
| "Cl": 50, | |
| "Cm": 51, | |
| "Cn": 52, | |
| "Co": 53, | |
| "Cr": 54, | |
| "Cs": 55, | |
| "Cu": 56, | |
| "Db": 57, | |
| "Ds": 58, | |
| "Dy": 59, | |
| "Er": 60, | |
| "Es": 61, | |
| "Eu": 62, | |
| "F": 63, | |
| "Fe": 64, | |
| "Fl": 65, | |
| "Fm": 66, | |
| "Fr": 67, | |
| "Ga": 68, | |
| "Gd": 69, | |
| "Ge": 70, | |
| "H": 71, | |
| "He": 72, | |
| "Hf": 73, | |
| "Hg": 74, | |
| "Ho": 75, | |
| "Hs": 76, | |
| "I": 77, | |
| "In": 78, | |
| "Ir": 79, | |
| "K": 80, | |
| "Kr": 81, | |
| "La": 82, | |
| "Li": 83, | |
| "Lr": 84, | |
| "Lu": 85, | |
| "Lv": 86, | |
| "Mc": 87, | |
| "Md": 88, | |
| "Mg": 89, | |
| "Mn": 90, | |
| "Mo": 91, | |
| "Mt": 92, | |
| "N": 93, | |
| "Na": 94, | |
| "Nb": 95, | |
| "Nd": 96, | |
| "Ne": 97, | |
| "Nh": 98, | |
| "Ni": 99, | |
| "No": 100, | |
| "Np": 101, | |
| "O": 102, | |
| "Og": 103, | |
| "Os": 104, | |
| "P": 105, | |
| "Pa": 106, | |
| "Pb": 107, | |
| "Pd": 108, | |
| "Pm": 109, | |
| "Po": 110, | |
| "Pr": 111, | |
| "Pt": 112, | |
| "Pu": 113, | |
| "Ra": 114, | |
| "Rb": 115, | |
| "Re": 116, | |
| "Rf": 117, | |
| "Rg": 118, | |
| "Rh": 119, | |
| "Rn": 120, | |
| "Ru": 121, | |
| "S": 122, | |
| "Sb": 123, | |
| "Sc": 124, | |
| "Se": 125, | |
| "Sg": 126, | |
| "Si": 127, | |
| "Sm": 128, | |
| "Sn": 129, | |
| "Sr": 130, | |
| "Ta": 131, | |
| "Tb": 132, | |
| "Tc": 133, | |
| "Te": 134, | |
| "Th": 135, | |
| "Ti": 136, | |
| "Tl": 137, | |
| "Tm": 138, | |
| "Ts": 139, | |
| "U": 140, | |
| "V": 141, | |
| "W": 142, | |
| "Xe": 143, | |
| "Y": 144, | |
| "Yb": 145, | |
| "Zn": 146, | |
| "Zr": 147, | |
| "[": 148, | |
| "\\": 149, | |
| "]": 150, | |
| "as": 151, | |
| "b": 152, | |
| "c": 153, | |
| "n": 154, | |
| "o": 155, | |
| "p": 156, | |
| "s": 157, | |
| "se": 158 | |
| }, | |
| "unk_token": "[UNK]" | |
| } | |
| } |