Upload tokenizer
Browse files- special_tokens_map.json +8 -0
- tokenization_decodon.py +321 -0
- tokenizer_config.json +59 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<CLS>",
|
| 3 |
+
"cls_token": "<CLS>",
|
| 4 |
+
"mask_token": "<MASK>",
|
| 5 |
+
"pad_token": "<PAD>",
|
| 6 |
+
"sep_token": "<SEP>",
|
| 7 |
+
"unk_token": "<UNK>"
|
| 8 |
+
}
|
tokenization_decodon.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
from transformers import PreTrainedTokenizer
|
| 5 |
+
from itertools import product
|
| 6 |
+
|
| 7 |
+
class DeCodonTokenizer(PreTrainedTokenizer):
|
| 8 |
+
"""
|
| 9 |
+
DeCodonTokenizer Tokenizer: tokenize 3-mer codons into tokens
|
| 10 |
+
The input sequences are expected to be raw sequences of coding DNA/RNA sequences.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
SUPPORTED_TYPES = ["dna", "rna"]
|
| 14 |
+
|
| 15 |
+
@staticmethod
|
| 16 |
+
def get_all_codons(seq_type="dna"):
|
| 17 |
+
"""
|
| 18 |
+
Get all possible codons.
|
| 19 |
+
"""
|
| 20 |
+
seq_type = seq_type.lower()
|
| 21 |
+
assert (
|
| 22 |
+
seq_type in DeCodonTokenizer.SUPPORTED_TYPES
|
| 23 |
+
), f"seq_type should be either 'dna' or 'rna'. Got {seq_type}!"
|
| 24 |
+
|
| 25 |
+
if seq_type == "dna":
|
| 26 |
+
return ["".join(codon) for codon in product("ACGT", repeat=3)]
|
| 27 |
+
else:
|
| 28 |
+
return ["".join(codon) for codon in product("ACGU", repeat=3)]
|
| 29 |
+
|
| 30 |
+
def __init__(
|
| 31 |
+
self,
|
| 32 |
+
vocab_file=None,
|
| 33 |
+
cls_token="<CLS>",
|
| 34 |
+
bos_token="<CLS>",
|
| 35 |
+
sep_token="<SEP>",
|
| 36 |
+
unk_token="<UNK>",
|
| 37 |
+
pad_token="<PAD>",
|
| 38 |
+
mask_token="<MASK>",
|
| 39 |
+
seq_type="dna",
|
| 40 |
+
**kwargs,
|
| 41 |
+
):
|
| 42 |
+
self.codons = self.get_all_codons(seq_type=seq_type)
|
| 43 |
+
self.seq_type = seq_type
|
| 44 |
+
self.special_tokens = [cls_token, sep_token, unk_token, pad_token, mask_token]
|
| 45 |
+
self.special_tokens = [str(token) for token in self.special_tokens]
|
| 46 |
+
|
| 47 |
+
if vocab_file is not None:
|
| 48 |
+
import json
|
| 49 |
+
with open(vocab_file, "r") as f:
|
| 50 |
+
self.encoder = json.load(f)
|
| 51 |
+
self.decoder = {i: k for k, i in self.encoder.items()}
|
| 52 |
+
else:
|
| 53 |
+
self.encoder = {k: i for i, k in enumerate(self.special_tokens + self.codons)}
|
| 54 |
+
self.decoder = {i: k for k, i in self.encoder.items()}
|
| 55 |
+
|
| 56 |
+
self.compiled_regex = re.compile(
|
| 57 |
+
"|".join(self.codons + self.special_tokens + [r"\S"])
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
super().__init__(
|
| 61 |
+
cls_token=cls_token,
|
| 62 |
+
bos_token=bos_token,
|
| 63 |
+
sep_token=sep_token,
|
| 64 |
+
unk_token=unk_token,
|
| 65 |
+
pad_token=pad_token,
|
| 66 |
+
mask_token=mask_token,
|
| 67 |
+
**kwargs,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
self.aa_to_codon = {
|
| 71 |
+
"A": ["GCT", "GCC", "GCA", "GCG"],
|
| 72 |
+
"C": ["TGT", "TGC"],
|
| 73 |
+
"D": ["GAT", "GAC"],
|
| 74 |
+
"E": ["GAA", "GAG"],
|
| 75 |
+
"F": ["TTT", "TTC"],
|
| 76 |
+
"G": ["GGT", "GGC", "GGA", "GGG"],
|
| 77 |
+
"H": ["CAT", "CAC"],
|
| 78 |
+
"I": ["ATT", "ATC", "ATA"],
|
| 79 |
+
"K": ["AAA", "AAG"],
|
| 80 |
+
"L": ["TTA", "TTG", "CTT", "CTC", "CTA", "CTG"],
|
| 81 |
+
"M": ["ATG"],
|
| 82 |
+
"N": ["AAT", "AAC"],
|
| 83 |
+
"P": ["CCT", "CCC", "CCA", "CCG"],
|
| 84 |
+
"Q": ["CAA", "CAG"],
|
| 85 |
+
"R": ["CGT", "CGC", "CGA", "CGG", "AGA", "AGG"],
|
| 86 |
+
"S": ["TCT", "TCC", "TCA", "TCG", "AGT", "AGC"],
|
| 87 |
+
"T": ["ACT", "ACC", "ACA", "ACG"],
|
| 88 |
+
"V": ["GTT", "GTC", "GTA", "GTG"],
|
| 89 |
+
"W": ["TGG"],
|
| 90 |
+
"Y": ["TAT", "TAC"],
|
| 91 |
+
"*": ["TAA", "TAG", "TGA"],
|
| 92 |
+
}
|
| 93 |
+
self.codon_to_aa = {
|
| 94 |
+
codon: aa for aa, codons in self.aa_to_codon.items() for codon in codons
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
if seq_type == "rna":
|
| 98 |
+
self.aa_to_codon = {
|
| 99 |
+
k: [c.replace("T", "U") for c in v] for k, v in self.aa_to_codon.items()
|
| 100 |
+
}
|
| 101 |
+
self.codon_to_aa = {
|
| 102 |
+
k.replace("T", "U"): v for k, v in self.codon_to_aa.items()
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
self.amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
|
| 106 |
+
self.encoder_aa = {
|
| 107 |
+
k: i for i, k in enumerate(self.special_tokens + self.amino_acids)
|
| 108 |
+
}
|
| 109 |
+
self.compiled_regex_aa = re.compile(
|
| 110 |
+
"|".join(self.amino_acids + self.special_tokens + [r"\S"])
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
self.token_type_mode = kwargs.get("token_type_mode", "regular")
|
| 114 |
+
self.build_token_type_encoder()
|
| 115 |
+
|
| 116 |
+
def set_organism_tokens(self, organism_tokens):
|
| 117 |
+
"""
|
| 118 |
+
Add organism tokens to the tokenizer.
|
| 119 |
+
"""
|
| 120 |
+
vocab_size = len(self.encoder)
|
| 121 |
+
for i, token in enumerate(organism_tokens):
|
| 122 |
+
self.encoder[token] = vocab_size + i
|
| 123 |
+
self.decoder[vocab_size + i] = token
|
| 124 |
+
|
| 125 |
+
self.organism_tokens = organism_tokens
|
| 126 |
+
self.compiled_regex = re.compile(
|
| 127 |
+
"|".join(self.codons + self.special_tokens + organism_tokens + [r"\S"])
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
@property
|
| 131 |
+
def vocab_size(self):
|
| 132 |
+
return len(self.encoder)
|
| 133 |
+
|
| 134 |
+
def build_token_type_encoder(self):
|
| 135 |
+
if self.token_type_mode == "aa":
|
| 136 |
+
# build a token type encoder for amino acids with codon ids as keys and amino acid ids as values
|
| 137 |
+
# CLS, SEP, UNK, MASK, PAD tokens are assigned to the same token type as zero
|
| 138 |
+
token_type_encoder = {}
|
| 139 |
+
for token, token_id in self.encoder.items():
|
| 140 |
+
if token in self.special_tokens:
|
| 141 |
+
token_type_encoder[token_id] = 0
|
| 142 |
+
elif token in self.codons:
|
| 143 |
+
aa = self.codon_to_aa[token]
|
| 144 |
+
token_type_encoder[token_id] = (
|
| 145 |
+
list(self.amino_acids + ["*"]).index(aa) + 1
|
| 146 |
+
)
|
| 147 |
+
else:
|
| 148 |
+
token_type_encoder[token_id] = len(self.amino_acids) + 2
|
| 149 |
+
elif self.token_type_mode == "regular":
|
| 150 |
+
# build a token type encoder for regular tokens
|
| 151 |
+
token_type_encoder = {token_id: 0 for token_id in self.encoder.values()}
|
| 152 |
+
elif self.token_type_mode == "regular_special":
|
| 153 |
+
# build a token type encoder for regular tokens with special tokens having a different but same token type
|
| 154 |
+
token_type_encoder = {
|
| 155 |
+
token_id: 0 if token in self.special_tokens else 1
|
| 156 |
+
for token, token_id in self.encoder.items()
|
| 157 |
+
}
|
| 158 |
+
else:
|
| 159 |
+
raise ValueError(f"Unknown token type mode: {self.token_type_mode}")
|
| 160 |
+
|
| 161 |
+
self.token_type_encoder = token_type_encoder
|
| 162 |
+
|
| 163 |
+
@property
|
| 164 |
+
def token_type_vocab_size(self):
|
| 165 |
+
return len(set(self.token_type_encoder.values())) + 1
|
| 166 |
+
|
| 167 |
+
def get_vocab(self):
|
| 168 |
+
return dict(self.encoder, **self.added_tokens_encoder)
|
| 169 |
+
|
| 170 |
+
def _tokenize(self, text):
|
| 171 |
+
"""
|
| 172 |
+
Tokenize a string.
|
| 173 |
+
"""
|
| 174 |
+
text = text.upper()
|
| 175 |
+
tokens = self.compiled_regex.findall(text)
|
| 176 |
+
return tokens
|
| 177 |
+
|
| 178 |
+
def _convert_token_to_id(self, token):
|
| 179 |
+
"""
|
| 180 |
+
Converts a token (str) in an id using the vocab.
|
| 181 |
+
"""
|
| 182 |
+
return self.encoder.get(token, self.encoder[self.unk_token])
|
| 183 |
+
|
| 184 |
+
def _convert_id_to_token(self, index):
|
| 185 |
+
"""
|
| 186 |
+
Converts an index (integer) in a token (str) using the vocab.
|
| 187 |
+
"""
|
| 188 |
+
return self.decoder.get(index, self.unk_token)
|
| 189 |
+
|
| 190 |
+
def convert_tokens_to_string(self, tokens):
|
| 191 |
+
"""
|
| 192 |
+
Converts a sequence of tokens (string) in a single string.
|
| 193 |
+
"""
|
| 194 |
+
return "".join(tokens)
|
| 195 |
+
|
| 196 |
+
def encode_aa(self, text):
|
| 197 |
+
"""
|
| 198 |
+
Encode a DNA/RNA string using the amino acid vocab.
|
| 199 |
+
"""
|
| 200 |
+
tokens = self._tokenize(text)
|
| 201 |
+
return [
|
| 202 |
+
self.encoder_aa.get(token, self.encoder_aa[self.unk_token])
|
| 203 |
+
for token in tokens
|
| 204 |
+
]
|
| 205 |
+
|
| 206 |
+
def get_aa_vocab_size(self):
|
| 207 |
+
return len(self.encoder_aa)
|
| 208 |
+
|
| 209 |
+
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
| 210 |
+
"""
|
| 211 |
+
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
| 212 |
+
adding special tokens.
|
| 213 |
+
|
| 214 |
+
This implementation does not add special tokens and this method should be overridden in a subclass.
|
| 215 |
+
|
| 216 |
+
Args:
|
| 217 |
+
token_ids_0 (`List[int]`): The first tokenized sequence.
|
| 218 |
+
token_ids_1 (`List[int]`, *optional*): The second tokenized sequence.
|
| 219 |
+
|
| 220 |
+
Returns:
|
| 221 |
+
`List[int]`: The model input with special tokens.
|
| 222 |
+
"""
|
| 223 |
+
token_ids_0 = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
| 224 |
+
return token_ids_0
|
| 225 |
+
|
| 226 |
+
def get_special_tokens_mask(
|
| 227 |
+
self, token_ids_0, token_ids_1=None, already_has_special_tokens: bool = False
|
| 228 |
+
):
|
| 229 |
+
"""
|
| 230 |
+
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
|
| 231 |
+
special tokens using the tokenizer `prepare_for_model` or `encode_plus` methods.
|
| 232 |
+
|
| 233 |
+
Args:
|
| 234 |
+
token_ids_0 (`List[int]`):
|
| 235 |
+
List of ids of the first sequence.
|
| 236 |
+
token_ids_1 (`List[int]`, *optional*):
|
| 237 |
+
List of ids of the second sequence.
|
| 238 |
+
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
|
| 239 |
+
Whether or not the token list is already formatted with special tokens for the model.
|
| 240 |
+
|
| 241 |
+
Returns:
|
| 242 |
+
A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
| 243 |
+
"""
|
| 244 |
+
special_ids = [
|
| 245 |
+
self.eos_token_id,
|
| 246 |
+
self.pad_token_id,
|
| 247 |
+
self.mask_token_id,
|
| 248 |
+
self.sep_token_id,
|
| 249 |
+
self.cls_token_id,
|
| 250 |
+
]
|
| 251 |
+
|
| 252 |
+
if already_has_special_tokens:
|
| 253 |
+
special_tokens_mask = [
|
| 254 |
+
1 if idx in special_ids else 0 for idx in token_ids_0
|
| 255 |
+
]
|
| 256 |
+
else:
|
| 257 |
+
special_tokens_mask = (
|
| 258 |
+
[1] + [1 if idx in special_ids else 0 for idx in token_ids_0] + [1]
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
return special_tokens_mask
|
| 262 |
+
|
| 263 |
+
def create_token_type_ids_from_sequences(self, token_ids_0, token_ids_1=None):
|
| 264 |
+
"""
|
| 265 |
+
Create the token type IDs corresponding to the sequences passed. [What are token type
|
| 266 |
+
IDs?](../glossary#token-type-ids)
|
| 267 |
+
|
| 268 |
+
Should be overridden in a subclass if the model has a special way of building those.
|
| 269 |
+
|
| 270 |
+
Args:
|
| 271 |
+
token_ids_0 (`List[int]`): The first tokenized sequence.
|
| 272 |
+
token_ids_1 (`List[int]`, *optional*): The second tokenized sequence.
|
| 273 |
+
|
| 274 |
+
Returns:
|
| 275 |
+
`List[int]`: The token type ids.
|
| 276 |
+
"""
|
| 277 |
+
# special_ids = [
|
| 278 |
+
# self.bos_token_id,
|
| 279 |
+
# self.eos_token_id,
|
| 280 |
+
# self.pad_token_id,
|
| 281 |
+
# self.mask_token_id,
|
| 282 |
+
# self.cls_token_id,
|
| 283 |
+
# self.sep_token_id,
|
| 284 |
+
# ]
|
| 285 |
+
|
| 286 |
+
# token_type_ids = [0] + [0 for idx in token_ids_0] + [0]
|
| 287 |
+
|
| 288 |
+
unk_type_id = len(set(self.token_type_encoder.values()))
|
| 289 |
+
|
| 290 |
+
token_type_ids = [
|
| 291 |
+
self.token_type_encoder.get(token_id, unk_type_id)
|
| 292 |
+
for token_id in token_ids_0
|
| 293 |
+
]
|
| 294 |
+
|
| 295 |
+
return token_type_ids
|
| 296 |
+
|
| 297 |
+
def save_vocabulary(self, save_directory, filename_prefix=None):
|
| 298 |
+
"""
|
| 299 |
+
Save only the vocabulary of the tokenizer (vocabulary + added tokens).
|
| 300 |
+
|
| 301 |
+
This method won't save the configuration and special token mappings of the tokenizer. Use
|
| 302 |
+
[`~PreTrainedTokenizerFast._save_pretrained`] to save the whole state of the tokenizer.
|
| 303 |
+
|
| 304 |
+
Args:
|
| 305 |
+
save_directory (`str`):
|
| 306 |
+
The directory in which to save the vocabulary.
|
| 307 |
+
filename_prefix (`str`, *optional*):
|
| 308 |
+
An optional prefix to add to the named of the saved files.
|
| 309 |
+
|
| 310 |
+
Returns:
|
| 311 |
+
`Tuple(str)`: Paths to the files saved.
|
| 312 |
+
"""
|
| 313 |
+
if filename_prefix is None:
|
| 314 |
+
filename_prefix = ""
|
| 315 |
+
|
| 316 |
+
vocab_file = os.path.join(save_directory, filename_prefix + "vocab.json")
|
| 317 |
+
|
| 318 |
+
with open(vocab_file, "w") as f:
|
| 319 |
+
json.dump(self.encoder, f)
|
| 320 |
+
|
| 321 |
+
return (vocab_file,)
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<CLS>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<SEP>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "<UNK>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<PAD>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"4": {
|
| 36 |
+
"content": "<MASK>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"auto_map": {
|
| 45 |
+
"AutoTokenizer": [
|
| 46 |
+
"tokenization_decodon.DeCodonTokenizer",
|
| 47 |
+
null
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
"bos_token": "<CLS>",
|
| 51 |
+
"clean_up_tokenization_spaces": true,
|
| 52 |
+
"cls_token": "<CLS>",
|
| 53 |
+
"mask_token": "<MASK>",
|
| 54 |
+
"model_max_length": 2048,
|
| 55 |
+
"pad_token": "<PAD>",
|
| 56 |
+
"sep_token": "<SEP>",
|
| 57 |
+
"tokenizer_class": "DeCodonTokenizer",
|
| 58 |
+
"unk_token": "<UNK>"
|
| 59 |
+
}
|
vocab.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"<CLS>": 0, "<SEP>": 1, "<UNK>": 2, "<PAD>": 3, "<MASK>": 4, "AAA": 5, "AAC": 6, "AAG": 7, "AAT": 8, "ACA": 9, "ACC": 10, "ACG": 11, "ACT": 12, "AGA": 13, "AGC": 14, "AGG": 15, "AGT": 16, "ATA": 17, "ATC": 18, "ATG": 19, "ATT": 20, "CAA": 21, "CAC": 22, "CAG": 23, "CAT": 24, "CCA": 25, "CCC": 26, "CCG": 27, "CCT": 28, "CGA": 29, "CGC": 30, "CGG": 31, "CGT": 32, "CTA": 33, "CTC": 34, "CTG": 35, "CTT": 36, "GAA": 37, "GAC": 38, "GAG": 39, "GAT": 40, "GCA": 41, "GCC": 42, "GCG": 43, "GCT": 44, "GGA": 45, "GGC": 46, "GGG": 47, "GGT": 48, "GTA": 49, "GTC": 50, "GTG": 51, "GTT": 52, "TAA": 53, "TAC": 54, "TAG": 55, "TAT": 56, "TCA": 57, "TCC": 58, "TCG": 59, "TCT": 60, "TGA": 61, "TGC": 62, "TGG": 63, "TGT": 64, "TTA": 65, "TTC": 66, "TTG": 67, "TTT": 68, "<1314>": 69, "<257758>": 70, "<208224>": 71, "<573>": 72, "<439334>": 73, "<1639>": 74, "<562>": 75, "<106648>": 76, "<287>": 77, "<28450>": 78, "<2104>": 79, "<600>": 80, "<727>": 81, "<549>": 82, "<2495445>": 83, "<1313>": 84, "<1680>": 85, "<1929291>": 86, "<2600065>": 87, "<2496766>": 88, "<1351>": 89, "<738>": 90, "<2702>": 91, "<210>": 92, "<741093>": 93, "<195>": 94, "<593905>": 95, "<485>": 96, "<283734>": 97, "<373>": 98, "<470>": 99, "<2781243>": 100, "<1311>": 101, "<273384>": 102, "<520>": 103, "<1307>": 104, "<458197>": 105, "<670>": 106, "<1280>": 107, "<1920116>": 108, "<1185650>": 109, "<74426>": 110, "<1915>": 111, "<347834>": 112, "<90370>": 113, "<487>": 114, "<587>": 115, "<1773>": 116, "<28901>": 117, "<158836>": 118, "<192955>": 119, "<1134687>": 120, "<29486>": 121, "<668>": 122, "<1282>": 123, "<2735528>": 124, "<1613>": 125, "<2944939>": 126, "<1329816>": 127, "<263>": 128, "<159>": 129, "<1203599>": 130, "<1522>": 131, "<2735914>": 132, "<220697>": 133, "<480>": 134, "<1408>": 135, "<59201>": 136, "<68213>": 137, "<666>": 138, "<624>": 139, "<1402>": 140, "<714>": 141, "<55601>": 142, "<1597>": 143, "<59823>": 144, "<1194708>": 145, "<1352>": 146, "<2077273>": 147, "<1502>": 148, "<553>": 149, "<1590>": 150, "<1747>": 151, "<1346>": 152, "<337>": 153, "<28116>": 154, "<58095>": 155, "<305>": 156, "<1690246>": 157, "<1496>": 158, "<2587012>": 159, "<611>": 160, "<546>": 161, "<28141>": 162, "<2594462>": 163, "<149539>": 164, "<44250>": 165, "<1621261>": 166, "<165179>": 167, "<28118>": 168, "<513160>": 169, "<818>": 170, "<615>": 171, "<197>": 172, "<813>": 173, "<623>": 174, "<92828>": 175, "<827>": 176, "<2026187>": 177, "<2745518>": 178, "<37919>": 179, "<2832373>": 180, "<328813>": 181, "<48>": 182, "<2487710>": 183, "<548>": 184, "<46680>": 185, "<1589>": 186, "<119857>": 187, "<629259>": 188, "<2731692>": 189, "<2920940>": 190, "<1396>": 191, "<731>": 192, "<1512>": 193, "<1540520>": 194, "<511>": 195, "<611301>": 196, "<358>": 197, "<57743>": 198, "<1078034>": 199, "<1552123>": 200, "<2017437>": 201, "<285446>": 202, "<90371>": 203, "<174>": 204, "<72407>": 205, "<371601>": 206, "<152331>": 207, "<393003>": 208, "<47715>": 209, "<1405>": 210, "<36809>": 211, "<1281>": 212, "<244319>": 213, "<672>": 214, "<930091>": 215, "<85569>": 216, "<2020689>": 217, "<1871066>": 218, "<2996033>": 219, "<1406>": 220, "<206662>": 221, "<1423>": 222, "<134601>": 223, "<28214>": 224, "<1464011>": 225, "<40518>": 226, "<747>": 227, "<1717>": 228, "<2590157>": 229, "<318456>": 230, "<154288>": 231, "<155322>": 232, "<2725562>": 233, "<1781>": 234, "<83334>": 235, "<1390>": 236, "<91891>": 237, "<594>": 238, "<87>": 239, "<379347>": 240, "<224729>": 241, "<47853>": 242, "<33043>": 243, "<149390>": 244, "<58712>": 245, "<588>": 246, "<868175>": 247, "<150054>": 248, "<436515>": 249, "<729>": 250, "<28142>": 251, "<2666183>": 252, "<817>": 253, "<2023717>": 254, "<492670>": 255, "<2184566>": 256, "<1485217>": 257, "<1609103>": 258, "<1877>": 259, "<1391993>": 260, "<413503>": 261, "<1359>": 262, "<239935>": 263, "<1354>": 264, "<2940559>": 265, "<739141>": 266, "<1892865>": 267, "<726>": 268, "<552>": 269, "<1358>": 270, "<29497>": 271, "<1823>": 272, "<561879>": 273, "<2029412>": 274, "<54388>": 275, "<471>": 276, "<580165>": 277, "<2696284>": 278, "<60552>": 279, "<106654>": 280, "<911022>": 281, "<442694>": 282, "<28144>": 283, "<208962>": 284, "<1448310>": 285, "<1001741>": 286, "<38323>": 287, "<2769343>": 288, "<1428>": 289, "<235>": 290, "<103796>": 291, "<1123286>": 292, "<33945>": 293, "<2994668>": 294, "<1074486>": 295, "<329>": 296, "<95486>": 297, "<31998>": 298, "<2070558>": 299, "<1172618>": 300, "<318>": 301, "<87883>": 302, "<1812935>": 303, "<571>": 304, "<384>": 305, "<28150>": 306, "<53345>": 307, "<64187>": 308, "<989370>": 309, "<550>": 310, "<1767>": 311, "<321>": 312, "<1151232>": 313, "<28037>": 314, "<1145106>": 315, "<410942>": 316, "<663>": 317, "<1283>": 318, "<1348>": 319, "<996>": 320, "<820>": 321, "<418223>": 322, "<1981506>": 323, "<1491>": 324, "<2285>": 325, "<1925021>": 326, "<1573941>": 327, "<1547>": 328, "<216816>": 329, "<2587157>": 330, "<43767>": 331, "<29339>": 332, "<1881062>": 333, "<2675880>": 334, "<650164>": 335, "<317>": 336, "<2583588>": 337, "<1585>": 338, "<1306179>": 339, "<60550>": 340, "<251653>": 341, "<3004093>": 342, "<1296536>": 343, "<1144306>": 344, "<1393>": 345, "<1270>": 346, "<101571>": 347, "<2919925>": 348, "<2494701>": 349, "<363569>": 350, "<595>": 351, "<1679>": 352, "<1640>": 353, "<218823>": 354, "<633>": 355, "<325776>": 356, "<319>": 357, "<1296>": 358, "<1328>": 359, "<86102>": 360, "<57706>": 361, "<524884>": 362, "<2885160>": 363, "<29523>": 364, "<881260>": 365, "<985762>": 366, "<35790>": 367, "<54291>": 368, "<42897>": 369, "<1598>": 370, "<2608338>": 371, "<36808>": 372, "<1443902>": 373, "<756892>": 374, "<2699428>": 375, "<487521>": 376, "<584>": 377, "<33038>": 378, "<2819934>": 379, "<713604>": 380, "<1463165>": 381, "<440524>": 382, "<88431>": 383, "<35620>": 384, "<1335620>": 385, "<2654255>": 386, "<1898040>": 387, "<1885032>": 388, "<1602942>": 389, "<29385>": 390, "<1587>": 391, "<2745138>": 392, "<1624>": 393, "<1502999>": 394, "<40041>": 395, "<1583>": 396, "<1303>": 397, "<821>": 398, "<2819289>": 399, "<686>": 400, "<299766>": 401, "<1596>": 402, "<50719>": 403, "<2608867>": 404, "<1940621>": 405, "<103232>": 406, "<113571>": 407, "<381>": 408, "<1463164>": 409, "<1308>": 410, "<2877527>": 411, "<1295>": 412, "<1643>": 413, "<340>": 414, "<375>": 415, "<2804533>": 416, "<412384>": 417, "<1768>": 418, "<280505>": 419, "<2952254>": 420, "<1433>": 421, "<2493671>": 422, "<596>": 423, "<35703>": 424, "<69974>": 425, "<2962034>": 426, "<2005388>": 427, "<488447>": 428, "<1450429>": 429, "<213>": 430, "<2769301>": 431, "<115981>": 432, "<54914>": 433, "<108619>": 434, "<592978>": 435, "<1463938>": 436, "<135735>": 437, "<2771199>": 438, "<648>": 439, "<1304>": 440, "<61645>": 441, "<1798>": 442, "<75588>": 443, "<2862949>": 444, "<1954>": 445, "<48296>": 446, "<2782615>": 447, "<1241582>": 448, "<387>": 449, "<1401>": 450, "<40324>": 451, "<1828>": 452, "<1438695>": 453, "<64104>": 454, "<40545>": 455, "<553239>": 456, "<28108>": 457, "<152268>": 458, "<149391>": 459, "<1230389>": 460, "<1962118>": 461, "<2813349>": 462, "<853>": 463, "<1247>": 464, "<76759>": 465, "<1286>": 466, "<332186>": 467, "<1404>": 468, "<2744522>": 469, "<29397>": 470, "<37636>": 471, "<1628392>": 472, "<1719>": 473, "<1055537>": 474, "<228756>": 475, "<2219225>": 476, "<2753541>": 477, "<587753>": 478, "<172045>": 479, "<1390116>": 480, "<201>": 481, "<2992120>": 482, "<1292>": 483, "<1520>": 484, "<1434204>": 485, "<649756>": 486, "<1300225>": 487, "<2562792>": 488, "<1953771>": 489, "<33887>": 490, "<1402556>": 491, "<1138452>": 492, "<2879630>": 493, "<33964>": 494, "<2745499>": 495, "<40215>": 496, "<488731>": 497, "<1116179>": 498, "<980427>": 499, "<2763107>": 500, "<1622>": 501, "<96345>": 502, "<82541>": 503, "<1608996>": 504, "<135487>": 505, "<244366>": 506, "<1886>": 507, "<40318>": 508, "<630>": 509, "<1891097>": 510, "<1398>": 511, "<669>": 512, "<728>": 513, "<1513793>": 514, "<2769423>": 515, "<180957>": 516, "<411483>": 517, "<237656>": 518, "<1392>": 519, "<180434>": 520, "<61647>": 521, "<446>": 522, "<1812934>": 523, "<2911965>": 524, "<1946>": 525, "<13690>": 526, "<1920666>": 527, "<1304917>": 528, "<1710>": 529, "<410072>": 530, "<545>": 531, "<2775420>": 532, "<777>": 533, "<1284392>": 534, "<1116028>": 535, "<2842346>": 536, "<868144>": 537, "<1648923>": 538, "<1464079>": 539, "<1293>": 540, "<2681485>": 541, "<680>": 542, "<2780401>": 543, "<406432>": 544, "<86185>": 545, "<221822>": 546, "<310300>": 547, "<339>": 548, "<323>": 549, "<1541988>": 550, "<2744516>": 551, "<299767>": 552, "<136468>": 553, "<768494>": 554, "<192149>": 555, "<2895494>": 556, "<768493>": 557, "<251700>": 558, "<194542>": 559, "<28026>": 560, "<1822258>": 561, "<219574>": 562, "<143221>": 563, "<199>": 564, "<1772>": 565, "<497965>": 566, "<2832371>": 567, "<1131758>": 568, "<674>": 569, "<35814>": 570, "<39496>": 571, "<394935>": 572, "<1403038>": 573, "<2897386>": 574, "<1736585>": 575, "<2589974>": 576, "<269536>": 577, "<1434196>": 578, "<47770>": 579, "<1837343>": 580, "<28198>": 581, "<1329819>": 582, "<52696>": 583, "<1446609>": 584, "<2777958>": 585, "<1806053>": 586, "<380021>": 587, "<364929>": 588, "<2807651>": 589, "<39491>": 590, "<1196095>": 591, "<2692790>": 592, "<632>": 593, "<292462>": 594, "<1912094>": 595, "<154654>": 596, "<316>": 597, "<180863>": 598, "<1764>": 599, "<1531>": 600, "<2690291>": 601, "<2203200>": 602, "<117681>": 603, "<38301>": 604, "<53707>": 605, "<84112>": 606, "<555>": 607, "<2756293>": 608, "<1642>": 609, "<1122917>": 610, "<703>": 611, "<1041153>": 612, "<204042>": 613, "<2563942>": 614, "<359>": 615, "<1144304>": 616, "<911243>": 617, "<2851540>": 618, "<2824890>": 619, "<621>": 620, "<1914933>": 621, "<2527775>": 622, "<1381597>": 623, "<29448>": 624, "<676>": 625, "<1255>": 626, "<279826>": 627, "<881290>": 628, "<1856864>": 629, "<399>": 630, "<208479>": 631, "<1444087>": 632, "<1888>": 633, "<1530123>": 634, "<29459>": 635, "<1584>": 636, "<1181771>": 637, "<1290>": 638, "<47917>": 639, "<1117645>": 640, "<1503054>": 641, "<29519>": 642, "<29388>": 643, "<91352>": 644, "<1124987>": 645, "<1274>": 646, "<1334>": 647, "<1926307>": 648, "<1765>": 649, "<28035>": 650, "<823>": 651, "<1690>": 652, "<2742680>": 653, "<1625>": 654, "<85698>": 655, "<598>": 656, "<38313>": 657, "<47885>": 658, "<2862678>": 659, "<1287232>": 660, "<1227076>": 661, "<2371>": 662, "<139415>": 663, "<28111>": 664, "<226900>": 665, "<2589934>": 666, "<1705310>": 667, "<1444073>": 668, "<2033487>": 669, "<37931>": 670, "<1341646>": 671, "<2184697>": 672, "<33968>": 673, "<1839785>": 674, "<483908>": 675, "<293387>": 676, "<385966>": 677, "<192954>": 678, "<1349>": 679, "<1850361>": 680, "<519>": 681, "<306>": 682, "<47678>": 683, "<2829161>": 684, "<1436064>": 685, "<1638>": 686, "<1235825>": 687, "<357276>": 688, "<1472416>": 689, "<981384>": 690, "<159612>": 691, "<2340>": 692, "<148942>": 693, "<1639133>": 694, "<173>": 695, "<2782349>": 696, "<133448>": 697, "<75612>": 698, "<1384636>": 699, "<1238450>": 700, "<2976152>": 701, "<204039>": 702, "<33888>": 703, "<1009714>": 704, "<2496669>": 705, "<1882769>": 706, "<2835864>": 707, "<32022>": 708, "<2878385>": 709, "<1854574>": 710, "<43770>": 711, "<1736290>": 712, "<137591>": 713, "<868182>": 714, "<129140>": 715, "<682797>": 716, "<2069256>": 717, "<246787>": 718, "<127906>": 719, "<258533>": 720, "<871665>": 721, "<217203>": 722, "<1619948>": 723, "<1681>": 724, "<43306>": 725, "<138532>": 726, "<75985>": 727, "<1958813>": 728, "<2378>": 729, "<180844>": 730, "<1069448>": 731, "<68223>": 732, "<2717483>": 733, "<44935>": 734, "<2806611>": 735, "<246167>": 736, "<2602015>": 737, "<582>": 738, "<649196>": 739, "<1086033>": 740, "<254785>": 741, "<83333>": 742, "<42881>": 743, "<2209>": 744, "<1192854>": 745, "<1890302>": 746, "<1829>": 747, "<310297>": 748, "<2893891>": 749, "<1796646>": 750, "<1488392>": 751, "<294>": 752, "<1519493>": 753, "<1286181>": 754, "<2950553>": 755, "<1055538>": 756, "<357441>": 757, "<34085>": 758, "<651>": 759, "<2033499>": 760, "<2782625>": 761, "<702444>": 762, "<83219>": 763, "<1441627>": 764, "<654>": 765, "<2978478>": 766, "<1509>": 767, "<1277687>": 768, "<2541721>": 769, "<710243>": 770, "<1926319>": 771, "<1238>": 772, "<1642299>": 773, "<1748967>": 774, "<46126>": 775, "<56448>": 776, "<1254>": 777, "<293958>": 778, "<1450430>": 779, "<1644131>": 780, "<2744469>": 781, "<182337>": 782, "<47879>": 783, "<2316732>": 784, "<2798820>": 785, "<2200759>": 786, "<104087>": 787, "<656393>": 788, "<300181>": 789, "<76761>": 790, "<1250026>": 791, "<644>": 792, "<189426>": 793, "<554>": 794, "<336306>": 795, "<619300>": 796, "<1637645>": 797, "<1488397>": 798, "<28182>": 799, "<595537>": 800, "<2925845>": 801, "<68239>": 802, "<260799>": 803, "<2135648>": 804, "<2812563>": 805, "<1871053>": 806, "<93064>": 807, "<29382>": 808, "<56459>": 809, "<2605746>": 810, "<68570>": 811, "<222805>": 812, "<575>": 813, "<28172>": 814, "<110321>": 815, "<1661035>": 816, "<2979328>": 817, "<319706>": 818, "<2897332>": 819, "<1281102>": 820, "<564>": 821, "<28132>": 822, "<2692856>": 823, "<1179674>": 824, "<1519492>": 825, "<132265>": 826, "<1353535>": 827, "<1862322>": 828, "<1545443>": 829, "<1661033>": 830, "<1839798>": 831, "<2816456>": 832, "<323284>": 833, "<1324013>": 834, "<83655>": 835, "<2026188>": 836, "<2995158>": 837, "<1882682>": 838, "<319939>": 839, "<2500152>": 840, "<2602016>": 841, "<1053242>": 842, "<1580>": 843, "<40216>": 844, "<260554>": 845, "<1463842>": 846, "<470096>": 847, "<44752>": 848, "<292>": 849, "<2566012>": 850, "<529>": 851, "<457387>": 852, "<33069>": 853, "<1620419>": 854, "<1828643>": 855, "<562971>": 856, "<1194421>": 857, "<558537>": 858, "<2797535>": 859, "<1033813>": 860, "<2798844>": 861, "<376>": 862, "<2607819>": 863, "<2778092>": 864, "<1428624>": 865, "<1297617>": 866, "<285>": 867, "<400667>": 868, "<56458>": 869, "<82979>": 870, "<151783>": 871, "<1896961>": 872, "<1444253>": 873, "<46503>": 874, "<2600577>": 875, "<47716>": 876, "<146821>": 877, "<2690350>": 878, "<2530387>": 879, "<33900>": 880, "<2725666>": 881, "<2762559>": 882, "<2026186>": 883, "<488729>": 884, "<1599>": 885, "<1509407>": 886, "<1210076>": 887, "<158822>": 888, "<1468410>": 889, "<1206730>": 890, "<2487355>": 891, "<1314773>": 892, "<1892>": 893, "<2591009>": 894, "<382>": 895, "<1302>": 896, "<47951>": 897, "<1281189>": 898, "<1333585>": 899, "<674529>": 900, "<1283291>": 901, "<2054605>": 902, "<1444256>": 903, "<2810309>": 904, "<150055>": 905, "<38307>": 906, "<1766>": 907, "<300876>": 908, "<1505>": 909, "<96241>": 910, "<2666085>": 911, "<1908205>": 912, "<1305>": 913, "<47880>": 914, "<1834099>": 915, "<108015>": 916, "<1033741>": 917, "<2796387>": 918, "<227321>": 919, "<44283>": 920, "<1310165>": 921, "<228230>": 922, "<2653932>": 923, "<2015170>": 924, "<246432>": 925, "<2306165>": 926, "<1243663>": 927, "<1974212>": 928, "<1703933>": 929, "<214106>": 930, "<1452>": 931, "<1176649>": 932, "<1505087>": 933, "<135461>": 934, "<2881339>": 935, "<29438>": 936, "<400777>": 937, "<1360>": 938, "<1051653>": 939, "<141679>": 940, "<1579>": 941, "<2070566>": 942, "<1150298>": 943, "<1269000>": 944, "<2920306>": 945, "<1718>": 946, "<2728024>": 947, "<61015>": 948, "<645>": 949, "<1504709>": 950, "<865>": 951, "<715>": 952, "<221028>": 953, "<1414720>": 954, "<1438723>": 955, "<1114961>": 956, "<2203208>": 957, "<1750590>": 958, "<1913371>": 959, "<1835702>": 960, "<1312183>": 961, "<1836>": 962, "<1803665>": 963, "<303>": 964, "<1720574>": 965, "<1352941>": 966, "<60894>": 967, "<1828668>": 968, "<90975>": 969, "<2485162>": 970, "<1114943>": 971, "<2037901>": 972, "<146919>": 973, "<486>": 974, "<89065>": 975, "<1981699>": 976, "<260367>": 977, "<74829>": 978, "<71999>": 979, "<1038863>": 980, "<2723074>": 981, "<1446559>": 982, "<342113>": 983, "<57975>": 984, "<67263>": 985, "<1245029>": 986, "<2832364>": 987, "<1405293>": 988, "<2518342>": 989, "<2071715>": 990, "<404407>": 991, "<360921>": 992, "<1353>": 993, "<1920191>": 994, "<1993>": 995, "<652611>": 996, "<44415>": 997, "<483913>": 998, "<1403043>": 999, "<2530454>": 1000, "<1885572>": 1001, "<13373>": 1002, "<51671>": 1003, "<285458>": 1004, "<2316736>": 1005, "<2580423>": 1006, "<350702>": 1007, "<360920>": 1008, "<1478>": 1009, "<1679167>": 1010, "<1764274>": 1011, "<2717488>": 1012, "<2035>": 1013, "<122>": 1014, "<2876625>": 1015, "<2714351>": 1016, "<33013>": 1017, "<2811430>": 1018, "<582475>": 1019, "<110322>": 1020, "<2607817>": 1021, "<1644108>": 1022, "<613905>": 1023, "<2690361>": 1024, "<310355>": 1025, "<2608688>": 1026, "<577680>": 1027, "<2952248>": 1028, "<1288>": 1029, "<208964>": 1030, "<1736531>": 1031, "<2806612>": 1032, "<1905288>": 1033, "<67003>": 1034, "<2496752>": 1035, "<56460>": 1036, "<149385>": 1037, "<1905730>": 1038, "<1402135>": 1039, "<766224>": 1040, "<298>": 1041, "<193462>": 1042, "<1330040>": 1043, "<2706117>": 1044, "<1367451>": 1045, "<2487520>": 1046, "<2487423>": 1047, "<1926495>": 1048, "<1138194>": 1049, "<59893>": 1050, "<2292240>": 1051, "<941280>": 1052, "<1961>": 1053, "<2994442>": 1054, "<90105>": 1055, "<1705110>": 1056, "<60520>": 1057, "<1410652>": 1058, "<754774>": 1059, "<28147>": 1060, "<1151244>": 1061, "<930124>": 1062, "<1193501>": 1063, "<169679>": 1064, "<925125>": 1065, "<2703793>": 1066, "<3018097>": 1067, "<2920352>": 1068, "<665099>": 1069, "<266>": 1070, "<28454>": 1071, "<1005540>": 1072, "<1460450>": 1073, "<1685>": 1074, "<1463905>": 1075, "<1104679>": 1076, "<1176302>": 1077, "<1908235>": 1078, "<28101>": 1079, "<216778>": 1080, "<1715149>": 1081, "<2340917>": 1082, "<337330>": 1083, "<1709002>": 1084, "<94626>": 1085, "<2592816>": 1086, "<876>": 1087, "<1162286>": 1088, "<1882741>": 1089, "<29338>": 1090, "<238>": 1091, "<913090>": 1092, "<2867268>": 1093, "<868178>": 1094, "<66976>": 1095, "<2563970>": 1096, "<2789216>": 1097, "<1218105>": 1098, "<1839709>": 1099, "<2824811>": 1100, "<1094175>": 1101, "<46228>": 1102, "<1737490>": 1103, "<462227>": 1104, "<629258>": 1105, "<98360>": 1106, "<1053248>": 1107, "<546365>": 1108, "<2986923>": 1109, "<2744518>": 1110, "<29380>": 1111, "<103816>": 1112, "<86664>": 1113, "<1220562>": 1114, "<319705>": 1115, "<2109332>": 1116, "<1703925>": 1117, "<1281011>": 1118, "<1316803>": 1119, "<1265505>": 1120, "<68043>": 1121, "<2135642>": 1122, "<132474>": 1123, "<1394178>": 1124, "<2744515>": 1125, "<2589957>": 1126, "<1446502>": 1127, "<129817>": 1128, "<1833>": 1129, "<69>": 1130, "<2856603>": 1131, "<2184572>": 1132, "<456>": 1133, "<2075556>": 1134, "<436907>": 1135, "<935199>": 1136, "<661>": 1137, "<2607821>": 1138, "<2066047>": 1139, "<406341>": 1140, "<2719034>": 1141, "<1850238>": 1142, "<1309>": 1143, "<1342299>": 1144, "<137838>": 1145, "<1438684>": 1146, "<28898>": 1147, "<2052592>": 1148, "<2074>": 1149, "<526980>": 1150, "<1848738>": 1151, "<33876>": 1152, "<340188>": 1153, "<984487>": 1154, "<2589980>": 1155, "<1160705>": 1156, "<317013>": 1157, "<79880>": 1158, "<217204>": 1159, "<1674922>": 1160, "<1582>": 1161, "<2070758>": 1162, "<1435371>": 1163, "<868145>": 1164, "<2219865>": 1165, "<451>": 1166, "<67257>": 1167, "<2740297>": 1168, "<1548889>": 1169, "<1116129>": 1170, "<58097>": 1171, "<2736284>": 1172, "<1123262>": 1173, "<1397>": 1174, "<1138189>": 1175, "<29472>": 1176, "<1547922>": 1177, "<2559709>": 1178, "<146922>": 1179, "<1822239>": 1180, "<2742642>": 1181, "<1392869>": 1182, "<398578>": 1183, "<1284792>": 1184, "<2876624>": 1185, "<367791>": 1186, "<1841656>": 1187, "<2886093>": 1188, "<2703892>": 1189, "<56>": 1190, "<1207504>": 1191, "<1395925>": 1192, "<1400979>": 1193, "<47877>": 1194, "<2500523>": 1195, "<2692906>": 1196, "<571912>": 1197, "<152480>": 1198, "<1676617>": 1199, "<41978>": 1200, "<2690294>": 1201, "<46626>": 1202, "<2565368>": 1203, "<2824900>": 1204, "<1210066>": 1205, "<1476877>": 1206, "<2895489>": 1207, "<2767486>": 1208, "<169430>": 1209, "<2998080>": 1210, "<1089444>": 1211, "<2898149>": 1212, "<1761754>": 1213, "<2070644>": 1214, "<1446728>": 1215, "<2485166>": 1216, "<1181780>": 1217, "<286783>": 1218, "<1736411>": 1219, "<25>": 1220, "<1862316>": 1221, "<83262>": 1222, "<450367>": 1223, "<67283>": 1224, "<350058>": 1225, "<1885029>": 1226, "<1281005>": 1227, "<2320864>": 1228, "<310354>": 1229, "<1306418>": 1230, "<363952>": 1231, "<58343>": 1232, "<665959>": 1233, "<1928>": 1234, "<386>": 1235, "<2665544>": 1236, "<2690338>": 1237, "<486994>": 1238, "<1318>": 1239, "<1444111>": 1240, "<936565>": 1241, "<1128964>": 1242, "<2185284>": 1243, "<1855658>": 1244, "<2928855>": 1245, "<1446499>": 1246, "<2711209>": 1247, "<659>": 1248, "<29363>": 1249, "<1281116>": 1250, "<1768797>": 1251, "<29384>": 1252, "<396>": 1253, "<1848900>": 1254, "<1499973>": 1255, "<302911>": 1256, "<2928687>": 1257, "<1053236>": 1258, "<2497436>": 1259, "<48409>": 1260, "<1444123>": 1261, "<572264>": 1262, "<374606>": 1263, "<1682>": 1264, "<33059>": 1265, "<67593>": 1266, "<2006091>": 1267, "<1871054>": 1268, "<985665>": 1269, "<1412828>": 1270, "<1129896>": 1271, "<2613840>": 1272, "<2949648>": 1273, "<564198>": 1274, "<2706106>": 1275, "<70775>": 1276, "<1930280>": 1277, "<31234>": 1278, "<115778>": 1279, "<1827300>": 1280, "<244566>": 1281, "<2782627>": 1282, "<2970734>": 1283, "<1774>": 1284, "<1806496>": 1285, "<1420013>": 1286, "<1463900>": 1287, "<757414>": 1288, "<34073>": 1289, "<1284809>": 1290, "<2675851>": 1291, "<2723402>": 1292, "<1485682>": 1293, "<1703924>": 1294, "<516466>": 1295, "<1736262>": 1296, "<44010>": 1297, "<2782676>": 1298, "<1677857>": 1299, "<321984>": 1300, "<2608354>": 1301, "<1076926>": 1302, "<1165950>": 1303, "<2769285>": 1304, "<2719586>": 1305, "<2589915>": 1306, "<2755585>": 1307, "<2906758>": 1308, "<2952238>": 1309, "<565556>": 1310, "<154046>": 1311, "<454136>": 1312, "<321662>": 1313, "<1581705>": 1314, "<1465>": 1315, "<2961896>": 1316, "<2569763>": 1317, "<1806509>": 1318, "<34>": 1319, "<660027>": 1320, "<2751350>": 1321, "<1438689>": 1322, "<2806347>": 1323, "<2070666>": 1324, "<1535203>": 1325, "<2045>": 1326, "<669365>": 1327, "<49283>": 1328, "<2133767>": 1329, "<1777867>": 1330, "<68171>": 1331, "<43992>": 1332, "<749527>": 1333, "<1038844>": 1334, "<328812>": 1335, "<549687>": 1336, "<2829823>": 1337, "<930772>": 1338, "<2952230>": 1339, "<2070677>": 1340, "<656417>": 1341, "<88382>": 1342, "<2806102>": 1343, "<280145>": 1344, "<1637848>": 1345, "<1450516>": 1346, "<2682474>": 1347, "<218284>": 1348, "<2940596>": 1349, "<2745510>": 1350, "<1911>": 1351, "<1231522>": 1352, "<2742649>": 1353, "<2563964>": 1354, "<2775329>": 1355, "<301302>": 1356, "<37331>": 1357, "<2496664>": 1358, "<285676>": 1359, "<1628086>": 1360, "<2749083>": 1361, "<408>": 1362, "<1434927>": 1363, "<1080179>": 1364, "<2600619>": 1365, "<1535768>": 1366, "<1573406>": 1367, "<2681306>": 1368, "<1328367>": 1369, "<2774336>": 1370, "<754504>": 1371, "<2509717>": 1372, "<1355477>": 1373, "<1348630>": 1374, "<565073>": 1375, "<2982024>": 1376, "<488446>": 1377, "<1582493>": 1378, "<528191>": 1379, "<1116047>": 1380, "<1458253>": 1381, "<1183401>": 1382, "<132264>": 1383, "<66887>": 1384, "<251721>": 1385, "<2841646>": 1386, "<2923195>": 1387, "<1650438>": 1388, "<2563963>": 1389, "<2952244>": 1390, "<1871043>": 1391, "<1940289>": 1392, "<569862>": 1393, "<206163>": 1394, "<1778678>": 1395, "<2586989>": 1396, "<1438717>": 1397, "<1377993>": 1398, "<152142>": 1399, "<2908922>": 1400, "<1120623>": 1401, "<1187904>": 1402, "<202956>": 1403, "<447093>": 1404, "<1339266>": 1405, "<1415560>": 1406, "<1331664>": 1407, "<1290417>": 1408, "<1444156>": 1409, "<2769272>": 1410, "<2912190>": 1411, "<2920318>": 1412, "<2740518>": 1413, "<2033>": 1414, "<1844>": 1415, "<2886510>": 1416, "<2585753>": 1417, "<2528024>": 1418, "<2758570>": 1419, "<1245417>": 1420, "<2690446>": 1421, "<3020863>": 1422, "<1194400>": 1423, "<2775058>": 1424, "<163603>": 1425, "<2810310>": 1426, "<2709302>": 1427, "<58096>": 1428, "<670580>": 1429, "<1747223>": 1430, "<1503055>": 1431, "<1138383>": 1432, "<1901>": 1433, "<2738811>": 1434, "<2587052>": 1435, "<1699311>": 1436, "<2058923>": 1437, "<2846779>": 1438, "<453247>": 1439, "<2259035>": 1440, "<2823469>": 1441, "<251722>": 1442, "<1444258>": 1443, "<2920294>": 1444, "<9925>": 1445, "<577>": 1446, "<2692792>": 1447, "<2053030>": 1448, "<37332>": 1449, "<1736315>": 1450, "<2841064>": 1451, "<96344>": 1452, "<2995310>": 1453, "<1333544>": 1454, "<1265490>": 1455, "<1402967>": 1456, "<2478958>": 1457, "<37329>": 1458, "<2873260>": 1459, "<2024833>": 1460, "<2782641>": 1461, "<707209>": 1462, "<422271>": 1463, "<1444202>": 1464, "<1211807>": 1465, "<2759022>": 1466, "<1682204>": 1467, "<2023145>": 1468, "<1201031>": 1469, "<2172652>": 1470, "<56192>": 1471, "<569>": 1472, "<2800325>": 1473, "<2070676>": 1474, "<53408>": 1475, "<54571>": 1476, "<320389>": 1477, "<35762>": 1478, "<340190>": 1479, "<134375>": 1480, "<2498848>": 1481, "<2589971>": 1482, "<1350469>": 1483, "<1974323>": 1484, "<1294138>": 1485, "<622>": 1486, "<2725477>": 1487, "<2876644>": 1488, "<1777138>": 1489, "<2589918>": 1490, "<768528>": 1491, "<2903807>": 1492, "<66888>": 1493, "<2738808>": 1494, "<1795>": 1495, "<1967501>": 1496, "<2723094>": 1497, "<5698>": 1498, "<2876634>": 1499, "<1005538>": 1500, "<2705548>": 1501, "<1181745>": 1502, "<2917756>": 1503, "<1053196>": 1504, "<2058152>": 1505, "<1810943>": 1506, "<985002>": 1507, "<2022496>": 1508, "<1355015>": 1509, "<1664069>": 1510, "<2202417>": 1511, "<556534>": 1512, "<2856166>": 1513, "<2797524>": 1514, "<2587864>": 1515, "<28095>": 1516, "<754332>": 1517, "<1487953>": 1518, "<1725256>": 1519, "<2782658>": 1520, "<47883>": 1521, "<1172184>": 1522, "<2058307>": 1523, "<82689>": 1524, "<2604469>": 1525, "<1898961>": 1526, "<191391>": 1527, "<2989731>": 1528, "<1938810>": 1529, "<1981736>": 1530, "<360922>": 1531, "<2824877>": 1532, "<1173023>": 1533, "<1926588>": 1534, "<330879>": 1535, "<1173585>": 1536, "<1160721>": 1537, "<1389713>": 1538, "<2211212>": 1539, "<1136218>": 1540, "<1218074>": 1541, "<263849>": 1542, "<158877>": 1543, "<1314786>": 1544, "<1233231>": 1545, "<2605731>": 1546, "<2933326>": 1547, "<1940610>": 1548, "<2795130>": 1549, "<284031>": 1550, "<1967781>": 1551, "<2796469>": 1552, "<575193>": 1553, "<2714954>": 1554, "<284579>": 1555, "<2939573>": 1556, "<2861196>": 1557, "<2782638>": 1558, "<1869337>": 1559, "<2717699>": 1560, "<1446690>": 1561, "<2929851>": 1562, "<2782665>": 1563, "<1328760>": 1564, "<2728848>": 1565, "<1737066>": 1566, "<1904807>": 1567, "<1446645>": 1568, "<2789292>": 1569, "<2820809>": 1570, "<1053215>": 1571, "<215803>": 1572, "<2070652>": 1573, "<1736434>": 1574, "<32009>": 1575, "<1029986>": 1576, "<1335322>": 1577, "<1446675>": 1578, "<264730>": 1579, "<33035>": 1580, "<421767>": 1581, "<319709>": 1582, "<1869>": 1583, "<36807>": 1584, "<483687>": 1585, "<2035198>": 1586, "<1446663>": 1587, "<358708>": 1588, "<2994968>": 1589, "<1790044>": 1590, "<1443889>": 1591, "<930166>": 1592, "<200452>": 1593, "<336820>": 1594, "<2184563>": 1595, "<291644>": 1596, "<67260>": 1597, "<617145>": 1598, "<2741326>": 1599, "<2994970>": 1600, "<51201>": 1601, "<2920348>": 1602, "<1353890>": 1603, "<2745936>": 1604, "<2033502>": 1605, "<2603897>": 1606, "<1215099>": 1607, "<1328423>": 1608, "<648995>": 1609, "<1217282>": 1610, "<80868>": 1611, "<67350>": 1612, "<418784>": 1613, "<2528176>": 1614, "<2022519>": 1615, "<2684233>": 1616, "<1100828>": 1617, "<2787141>": 1618, "<121627>": 1619, "<2926010>": 1620, "<92933>": 1621, "<2738831>": 1622, "<2589893>": 1623, "<2849174>": 1624, "<1938860>": 1625, "<885040>": 1626, "<200454>": 1627, "<763782>": 1628, "<1444118>": 1629, "<2807188>": 1630, "<1280390>": 1631, "<2587068>": 1632, "<1206731>": 1633, "<2024861>": 1634, "<2302925>": 1635, "<2853165>": 1636, "<1444199>": 1637, "<2777969>": 1638, "<756982>": 1639, "<304899>": 1640, "<2202895>": 1641, "<54913>": 1642, "<2070575>": 1643, "<2057004>": 1644, "<2807622>": 1645, "<2823685>": 1646, "<2738978>": 1647, "<2821093>": 1648, "<42239>": 1649, "<1380770>": 1650, "<390989>": 1651, "<930090>": 1652, "<68219>": 1653, "<2107692>": 1654, "<1260625>": 1655, "<2876651>": 1656, "<2512216>": 1657, "<1519473>": 1658, "<2665486>": 1659, "<2665470>": 1660, "<36856>": 1661, "<1637836>": 1662, "<2692814>": 1663, "<86668>": 1664, "<70448>": 1665, "<76760>": 1666, "<2508874>": 1667, "<1665556>": 1668, "<2782654>": 1669, "<2709663>": 1670, "<2875709>": 1671, "<2512180>": 1672, "<487174>": 1673, "<2690320>": 1674, "<2202822>": 1675, "<1208103>": 1676, "<1281043>": 1677, "<2070588>": 1678, "<1736573>": 1679, "<2963091>": 1680, "<2816244>": 1681, "<46231>": 1682, "<1210063>": 1683, "<1907195>": 1684, "<1094170>": 1685, "<138277>": 1686, "<1284819>": 1687, "<1938>": 1688, "<2742660>": 1689, "<1328421>": 1690, "<2656787>": 1691, "<279058>": 1692, "<1353891>": 1693, "<65048>": 1694, "<119219>": 1695, "<2807722>": 1696, "<361572>": 1697, "<1834078>": 1698, "<1415562>": 1699, "<868156>": 1700, "<32002>": 1701, "<1967636>": 1702, "<2739487>": 1703, "<1528880>": 1704, "<2491679>": 1705, "<1391654>": 1706, "<1403028>": 1707, "<1667327>": 1708, "<1535636>": 1709, "<2558286>": 1710, "<28099>": 1711, "<2802176>": 1712, "<2933816>": 1713, "<2994495>": 1714, "<180867>": 1715, "<2064767>": 1716, "<984195>": 1717, "<2767463>": 1718, "<2817892>": 1719, "<1303090>": 1720, "<2793803>": 1721, "<2735913>": 1722, "<1494448>": 1723, "<1316147>": 1724, "<354356>": 1725, "<2527990>": 1726, "<2020326>": 1727, "<2782628>": 1728, "<352211>": 1729, "<1418242>": 1730, "<211759>": 1731, "<2920359>": 1732, "<1339265>": 1733, "<9606>": 1734, "<10090>": 1735}
|