File size: 2,020 Bytes
6a227f1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | {
"padding": true,
"return_lengths": false,
"return_word_embeddings": false,
"return_casing": false,
"return_features": false,
"return_chars": false,
"return_bert_embeddings": true,
"vocab_char": {
"<PAD>": 0,
"<UNK>": 1,
"$": 2,
"%": 3,
"+": 4,
",": 5,
"-": 6,
".": 7,
"/": 8,
"0": 9,
"1": 10,
"2": 11,
"3": 12,
"4": 13,
"5": 14,
"6": 15,
"7": 16,
"8": 17,
"9": 18,
":": 19,
"E": 20,
"F": 21,
"H": 22,
"J": 23,
"N": 24,
"O": 25,
"P": 26,
"S": 27,
"T": 28,
"a": 29,
"b": 30,
"d": 31,
"e": 32,
"f": 33,
"g": 34,
"h": 35,
"i": 36,
"l": 37,
"m": 38,
"n": 39,
"o": 40,
"p": 41,
"r": 42,
"s": 43,
"t": 44,
"u": 45,
"v": 46,
"w": 47,
"x": 48,
"y": 49,
"z": 50,
"{": 51,
"~": 52,
"\u00d7": 53,
"\u2022": 54,
"\u20ac": 55,
"\u223c": 56,
"\uf03c": 57
},
"vocab_tag": {
"<PAD>": 0,
"B-<alpha>": 1,
"B-<base>": 2,
"B-<number>": 3,
"B-<pow>": 4,
"I-<alpha>": 5,
"I-<base>": 6,
"I-<number>": 7,
"I-<pow>": 8,
"O": 9
},
"vocab_case": [
"<PAD>",
"numeric",
"allLower",
"allUpper",
"initialUpper",
"other",
"mainly_numeric",
"contains_digit"
],
"max_char_length": 30,
"feature_preprocessor": null,
"indice_tag": {
"0": "<PAD>",
"1": "B-<alpha>",
"2": "B-<base>",
"3": "B-<number>",
"4": "B-<pow>",
"5": "I-<alpha>",
"6": "I-<base>",
"7": "I-<number>",
"8": "I-<pow>",
"9": "O"
}
} |