mashirong
commited on
Commit
·
53268db
1
Parent(s):
34397f9
Remove unused file
Browse files- tokenization_deepseek.py +0 -328
tokenization_deepseek.py
DELETED
|
@@ -1,328 +0,0 @@
|
|
| 1 |
-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
-
#
|
| 3 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
-
# you may not use this file except in compliance with the License.
|
| 5 |
-
# You may obtain a copy of the License at
|
| 6 |
-
#
|
| 7 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
-
#
|
| 9 |
-
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
-
# See the License for the specific language governing permissions and
|
| 13 |
-
# limitations under the License.
|
| 14 |
-
|
| 15 |
-
"""
|
| 16 |
-
Forked from the file src/transformers/models/bert_generation/tokenization_bert_generation.py from the HuggingFace Transformers library.
|
| 17 |
-
Permalink: https://github.com/huggingface/transformers/blob/04ab5605fbb4ef207b10bf2772d88c53fc242e83/src/transformers/models/bert_generation/tokenization_bert_generation.py
|
| 18 |
-
Tokenizer class for ReplitLM
|
| 19 |
-
Class is modified for compatibility with custom vocabulary and to achieve desired encode/decode behavior for Replit Code V1 3B model.
|
| 20 |
-
"""
|
| 21 |
-
import os
|
| 22 |
-
import sentencepiece as spm
|
| 23 |
-
from sentencepiece import SentencePieceProcessor
|
| 24 |
-
from shutil import copyfile
|
| 25 |
-
from transformers import PreTrainedTokenizer
|
| 26 |
-
from typing import Any, Dict, List, Optional, Tuple
|
| 27 |
-
import base64
|
| 28 |
-
|
| 29 |
-
VOCAB_FILES_NAMES = {'vocab_file': 'spiece.model'}
|
| 30 |
-
|
| 31 |
-
class Tokenizer:
|
| 32 |
-
def __init__(self, model_path="/weka-jd/prod/deepseek/permanent/shared/mingchuan/llama_data/tokenizer.model"):
|
| 33 |
-
# reload tokenizer
|
| 34 |
-
assert os.path.isfile(model_path), model_path
|
| 35 |
-
self.sp_model = SentencePieceProcessor(model_file=model_path)
|
| 36 |
-
|
| 37 |
-
# # ? print spm for debugging
|
| 38 |
-
# spm_proto = sp_pb2_model.ModelProto()
|
| 39 |
-
# spm_proto.ParseFromString(self.sp_model.serialized_model_proto())
|
| 40 |
-
# print(dir(spm_proto))
|
| 41 |
-
# attrs = ['denormalizer_spec', 'normalizer_spec', 'trainer_spec']
|
| 42 |
-
# print('=======' * 5)
|
| 43 |
-
# for attr in attrs:
|
| 44 |
-
# print('=======', attr, '=======')
|
| 45 |
-
# print(getattr(spm_proto, attr))
|
| 46 |
-
|
| 47 |
-
# BOS / EOS token IDs
|
| 48 |
-
self.n_words: int = self.sp_model.vocab_size()
|
| 49 |
-
self.bos_id: int = self.sp_model.bos_id()
|
| 50 |
-
self.eos_id: int = self.sp_model.eos_id()
|
| 51 |
-
self.pad_id: int = self.sp_model.pad_id()
|
| 52 |
-
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
|
| 53 |
-
|
| 54 |
-
def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
|
| 55 |
-
assert type(s) is str
|
| 56 |
-
t = self.sp_model.encode(s)
|
| 57 |
-
if bos:
|
| 58 |
-
t = [self.bos_id] + t
|
| 59 |
-
if eos:
|
| 60 |
-
t = t + [self.eos_id]
|
| 61 |
-
return t
|
| 62 |
-
|
| 63 |
-
def decode(self, t: List[int]) -> str:
|
| 64 |
-
return self.sp_model.decode(t)
|
| 65 |
-
|
| 66 |
-
class LineBBPETokenizer(Tokenizer):
|
| 67 |
-
def __init__(self,
|
| 68 |
-
model_path="/3fs-jd/prod/deepseek/shared/daidamai/data/bbpe/spm_0717_final/100000/bbpe_full_bytes.model",
|
| 69 |
-
ignore_decode_err=False, attachfile_path=None):
|
| 70 |
-
super().__init__(model_path=model_path)
|
| 71 |
-
self.ignore_decode_err = ignore_decode_err
|
| 72 |
-
Bvocab_path = attachfile_path + "/byteVocab.txt"
|
| 73 |
-
#'/3fs-jd/prod/deepseek/shared/daidamai/data/bbpe/byteVocab.txt'
|
| 74 |
-
punct_path = attachfile_path + "/all_punct.txt"
|
| 75 |
-
#punct_path = '/3fs-jd/prod/deepseek/shared/daidamai/data/bbpe/all_punct.txt'
|
| 76 |
-
Bvocab = open(Bvocab_path, 'r', encoding = 'utf-8')
|
| 77 |
-
self.punct = []
|
| 78 |
-
with open(punct_path, 'r', encoding='utf-8') as f:
|
| 79 |
-
lines = f.readlines()
|
| 80 |
-
for line in lines:
|
| 81 |
-
line = line.strip()
|
| 82 |
-
if line:
|
| 83 |
-
self.punct.append(line)
|
| 84 |
-
|
| 85 |
-
self.numchars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
|
| 86 |
-
self.white_space = [' ']
|
| 87 |
-
self.special_chars = set(self.numchars) | set(self.punct) | set(self.white_space)
|
| 88 |
-
|
| 89 |
-
# ! remove chars that will be encoded to 0 (unk_id)
|
| 90 |
-
unk_ch = set()
|
| 91 |
-
for ch in self.special_chars:
|
| 92 |
-
ids = self.sp_model.encode(ch)
|
| 93 |
-
if 0 in ids:
|
| 94 |
-
unk_ch.update(ch)
|
| 95 |
-
self.special_chars = self.special_chars - unk_ch
|
| 96 |
-
|
| 97 |
-
self.byte2ch = [-1] * 256
|
| 98 |
-
self.ch2byte = {}
|
| 99 |
-
for line in list(Bvocab.readlines())[:256]:
|
| 100 |
-
tokens = line.strip().split('\t')
|
| 101 |
-
self.byte2ch[int(tokens[0])] = tokens[1]
|
| 102 |
-
self.ch2byte[tokens[1]] = int(tokens[0])
|
| 103 |
-
self.b16_dec = {}
|
| 104 |
-
self.b16_enc = ['x'] * 16
|
| 105 |
-
for i in range(10):
|
| 106 |
-
self.b16_dec[str(i)] = i
|
| 107 |
-
self.b16_enc[i] = str(i)
|
| 108 |
-
self.b16_dec['A'] = 10
|
| 109 |
-
self.b16_dec['B'] = 11
|
| 110 |
-
self.b16_dec['C'] = 12
|
| 111 |
-
self.b16_dec['D'] = 13
|
| 112 |
-
self.b16_dec['E'] = 14
|
| 113 |
-
self.b16_dec['F'] = 15
|
| 114 |
-
self.b16_enc[10] = 'A'
|
| 115 |
-
self.b16_enc[11] = 'B'
|
| 116 |
-
self.b16_enc[12] = 'C'
|
| 117 |
-
self.b16_enc[13] = 'D'
|
| 118 |
-
self.b16_enc[14] = 'E'
|
| 119 |
-
self.b16_enc[15] = 'F'
|
| 120 |
-
|
| 121 |
-
self.new_line_id = self.sp_model.encode(self.mapping_raw_to_256ch('\n'))[-1]
|
| 122 |
-
|
| 123 |
-
def base16encode(self, n):
|
| 124 |
-
return self.b16_enc[n // 16] + self.b16_enc[n % 16]
|
| 125 |
-
|
| 126 |
-
def base16decode(self, s):
|
| 127 |
-
return self.b16_dec[s[0]] * 16 + self.b16_dec[s[1]]
|
| 128 |
-
|
| 129 |
-
def mapping_raw_to_256ch(self, s: str) -> str:
|
| 130 |
-
mapped_s = []
|
| 131 |
-
for token in s:
|
| 132 |
-
if token in self.special_chars:
|
| 133 |
-
mapped_s.append(token)
|
| 134 |
-
continue
|
| 135 |
-
tk = str(base64.b16encode(token.encode("utf-8")))[2:-1]
|
| 136 |
-
num = len(tk) // 2
|
| 137 |
-
for i in range(num):
|
| 138 |
-
mapped_s.append(self.byte2ch[(self.base16decode(tk[2*i:2*i+2]))])
|
| 139 |
-
return ''.join(mapped_s)
|
| 140 |
-
|
| 141 |
-
def mapping_256ch_to_raw(self, s: str) -> str:
|
| 142 |
-
mapped_s = ''
|
| 143 |
-
for token in s:
|
| 144 |
-
if token in self.ch2byte:
|
| 145 |
-
mapped_s += self.base16encode(self.ch2byte[token])
|
| 146 |
-
else:
|
| 147 |
-
mapped_s += str(base64.b16encode(token.encode("utf-8")))[2:-1]
|
| 148 |
-
# decode utf-8 string to text string
|
| 149 |
-
byte_s = bytes.fromhex(mapped_s)
|
| 150 |
-
if self.ignore_decode_err:
|
| 151 |
-
try:
|
| 152 |
-
mapped_s = byte_s.decode('utf-8')
|
| 153 |
-
except UnicodeDecodeError:
|
| 154 |
-
mapped_s = ''
|
| 155 |
-
else:
|
| 156 |
-
mapped_s = byte_s.decode('utf-8')
|
| 157 |
-
return mapped_s
|
| 158 |
-
|
| 159 |
-
def encode_line(self, s):
|
| 160 |
-
if s == '\n':
|
| 161 |
-
return [self.new_line_id]
|
| 162 |
-
ss = self.mapping_raw_to_256ch(s)
|
| 163 |
-
t = self.sp_model.encode(ss)
|
| 164 |
-
return t
|
| 165 |
-
|
| 166 |
-
def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
|
| 167 |
-
assert type(s) is str
|
| 168 |
-
t = []
|
| 169 |
-
lines = s.split('\n')
|
| 170 |
-
n_lines = len(lines)
|
| 171 |
-
for i in range(n_lines):
|
| 172 |
-
if i != n_lines - 1:
|
| 173 |
-
line = lines[i] + '\n'
|
| 174 |
-
else:
|
| 175 |
-
line = lines[i]
|
| 176 |
-
tt = self.encode_line(line)
|
| 177 |
-
t += tt
|
| 178 |
-
if bos:
|
| 179 |
-
t = [self.bos_id] + t
|
| 180 |
-
if eos:
|
| 181 |
-
t = t + [self.eos_id]
|
| 182 |
-
return t
|
| 183 |
-
|
| 184 |
-
def get_restored_white_space(self, t):
|
| 185 |
-
t = t[:3]
|
| 186 |
-
if t[0] == self.bos_id:
|
| 187 |
-
t = t[1:]
|
| 188 |
-
decoded = self.sp_model.decode(t)
|
| 189 |
-
encoded = self.sp_model.encode(decoded)
|
| 190 |
-
if len(encoded) < len(t):
|
| 191 |
-
return ' '
|
| 192 |
-
else:
|
| 193 |
-
return ''
|
| 194 |
-
|
| 195 |
-
def decode_line(self, t):
|
| 196 |
-
if len(t) == 1 and t[0] == self.new_line_id:
|
| 197 |
-
return '\n'
|
| 198 |
-
# ? special bug fixing for a single whitespace in the line beginning, sentencepiece will consume it, we restore it
|
| 199 |
-
restored_white_space = self.get_restored_white_space(t)
|
| 200 |
-
ss = self.sp_model.decode(t)
|
| 201 |
-
s = restored_white_space + self.mapping_256ch_to_raw(ss)
|
| 202 |
-
return s
|
| 203 |
-
|
| 204 |
-
def decode(self, t: List[int]) -> str:
|
| 205 |
-
s = ''
|
| 206 |
-
new_line_indices = [index for index, value in enumerate(t) if value == self.new_line_id]
|
| 207 |
-
last_idx = 0
|
| 208 |
-
for i in range(len(new_line_indices)):
|
| 209 |
-
line_id = t[last_idx:new_line_indices[i] + 1]
|
| 210 |
-
ss = self.decode_line(line_id)
|
| 211 |
-
s += ss
|
| 212 |
-
last_idx = new_line_indices[i] + 1
|
| 213 |
-
if last_idx < len(t):
|
| 214 |
-
line_id = t[last_idx:]
|
| 215 |
-
ss = self.decode_line(line_id)
|
| 216 |
-
s += ss
|
| 217 |
-
return s
|
| 218 |
-
|
| 219 |
-
def add_special(self, special_tokens):
|
| 220 |
-
'''
|
| 221 |
-
add special tokens to the tokenizer
|
| 222 |
-
'''
|
| 223 |
-
spm_proto = sp_pb2_model.ModelProto()
|
| 224 |
-
spm_proto.ParseFromString(self.sp_model.serialized_model_proto())
|
| 225 |
-
for special_token in special_tokens:
|
| 226 |
-
new_p = sp_pb2_model.ModelProto().SentencePiece()
|
| 227 |
-
new_p.piece = self.mapping_raw_to_256ch(special_token)
|
| 228 |
-
new_p.score = 0.0
|
| 229 |
-
new_p.type = 4
|
| 230 |
-
spm_proto.pieces.append(new_p)
|
| 231 |
-
print(f'special token added: {special_token}')
|
| 232 |
-
self.sp_model.LoadFromSerializedProto(spm_proto.SerializeToString())
|
| 233 |
-
|
| 234 |
-
class DeepSeekTokenizer(PreTrainedTokenizer):
|
| 235 |
-
"""
|
| 236 |
-
Construct a ReplitLMTokenizer tokenizer. Based on [SentencePiece](https://github.com/google/sentencepiece).
|
| 237 |
-
This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
|
| 238 |
-
Args:
|
| 239 |
-
vocab_file (`str`):
|
| 240 |
-
[SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
|
| 241 |
-
contains the vocabulary necessary to instantiate a tokenizer.
|
| 242 |
-
eos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
|
| 243 |
-
The end of sequence token.
|
| 244 |
-
bos_token (`str`, *optional*, defaults to `None`):
|
| 245 |
-
The begin of sequence token.
|
| 246 |
-
unk_token (`str`, *optional*, defaults to `"<|unk|>"`):
|
| 247 |
-
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
| 248 |
-
token instead.
|
| 249 |
-
pad_token (`str`, *optional*, defaults to `"<|pad|>"`):
|
| 250 |
-
The token used for padding, for example when batching sequences of different lengths.
|
| 251 |
-
sp_model_kwargs (`dict`, *optional*):
|
| 252 |
-
Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
|
| 253 |
-
SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
|
| 254 |
-
to set:
|
| 255 |
-
- `enable_sampling`: Enable subword regularization.
|
| 256 |
-
- `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.
|
| 257 |
-
- `nbest_size = {0,1}`: No sampling is performed.
|
| 258 |
-
- `nbest_size > 1`: samples from the nbest_size results.
|
| 259 |
-
- `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
|
| 260 |
-
using forward-filtering-and-backward-sampling algorithm.
|
| 261 |
-
- `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
|
| 262 |
-
BPE-dropout.
|
| 263 |
-
"""
|
| 264 |
-
vocab_files_names = VOCAB_FILES_NAMES
|
| 265 |
-
prefix_tokens: List[int] = []
|
| 266 |
-
model_input_names = ['input_ids', 'attention_mask']
|
| 267 |
-
|
| 268 |
-
def __init__(self, vocab_file, bos_token="<s>", eos_token='</s>', unk_token=None, pad_token=None, sep_token='</s>', sp_model_kwargs: Optional[Dict[str, Any]]=None, name_or_path=None, **kwargs) -> None:
|
| 269 |
-
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
| 270 |
-
super().__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, pad_token=pad_token, sep_token=sep_token, sp_model_kwargs=self.sp_model_kwargs, **kwargs)
|
| 271 |
-
#obtain the current directory of py
|
| 272 |
-
vocab_path = name_or_path
|
| 273 |
-
print("vocab_path: ", vocab_path)
|
| 274 |
-
self.vocab_path = vocab_path
|
| 275 |
-
self.vocab_file = vocab_path + '/tokenizer.model'
|
| 276 |
-
self.token = LineBBPETokenizer(model_path=self.vocab_file, attachfile_path=vocab_path, ignore_decode_err=True)
|
| 277 |
-
|
| 278 |
-
@property
|
| 279 |
-
def vocab_size(self):
|
| 280 |
-
return self.token.sp_model.get_piece_size()
|
| 281 |
-
|
| 282 |
-
def get_vocab(self):
|
| 283 |
-
vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
|
| 284 |
-
vocab.update(self.added_tokens_encoder)
|
| 285 |
-
return vocab
|
| 286 |
-
|
| 287 |
-
def __getstate__(self):
|
| 288 |
-
state = self.__dict__.copy()
|
| 289 |
-
state['token'] = None
|
| 290 |
-
return state
|
| 291 |
-
|
| 292 |
-
def __setstate__(self, d):
|
| 293 |
-
self.__dict__ = d
|
| 294 |
-
if not hasattr(self, 'sp_model_kwargs'):
|
| 295 |
-
self.sp_model_kwargs = {}
|
| 296 |
-
self.token = LineBBPETokenizer(model_path=self.vocab_file, attachfile_path=self.vocab_path)
|
| 297 |
-
|
| 298 |
-
def _tokenize(self, text: str) -> List[str]:
|
| 299 |
-
"""Take as input a string and return a list of strings (tokens) for words/sub-words"""
|
| 300 |
-
token_ids = self.token.encode(text, bos=True, eos=False)
|
| 301 |
-
string_tokens = [self._convert_id_to_token(token_id) for token_id in token_ids]
|
| 302 |
-
return string_tokens
|
| 303 |
-
|
| 304 |
-
def _convert_token_to_id(self, token):
|
| 305 |
-
"""Converts a token (str) in an id using the vocab."""
|
| 306 |
-
return self.token.sp_model.piece_to_id(token)
|
| 307 |
-
|
| 308 |
-
def _convert_id_to_token(self, index):
|
| 309 |
-
"""Converts an index (integer) in a token (str) using the vocab."""
|
| 310 |
-
token = self.token.sp_model.id_to_piece(index)
|
| 311 |
-
return token
|
| 312 |
-
|
| 313 |
-
def convert_tokens_to_string(self, tokens):
|
| 314 |
-
"""Converts a sequence of tokens (string) in a single string."""
|
| 315 |
-
ids = [self._convert_token_to_id(token) for token in tokens]
|
| 316 |
-
return self.token.decode(ids)
|
| 317 |
-
|
| 318 |
-
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str]=None) -> Tuple[str]:
|
| 319 |
-
if not os.path.isdir(save_directory):
|
| 320 |
-
raise ValueError(f'Vocabulary path ({save_directory}) should be a directory')
|
| 321 |
-
out_vocab_file = os.path.join(save_directory, (filename_prefix + '-' if filename_prefix else '') + VOCAB_FILES_NAMES['vocab_file'])
|
| 322 |
-
if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
|
| 323 |
-
copyfile(self.vocab_file, out_vocab_file)
|
| 324 |
-
elif not os.path.isfile(self.vocab_file):
|
| 325 |
-
with open(out_vocab_file, 'wb') as fi:
|
| 326 |
-
content_spiece_model = self.sp_model.serialized_model_proto()
|
| 327 |
-
fi.write(content_spiece_model)
|
| 328 |
-
return (out_vocab_file,)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|