Spaces:
Running
Running
Delete latex_processor.py
Browse files- latex_processor.py +0 -581
latex_processor.py
DELETED
|
@@ -1,581 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import json
|
| 4 |
-
import shutil
|
| 5 |
-
import logging
|
| 6 |
-
import numpy as np
|
| 7 |
-
from PIL import Image
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
SKIP_PATTERNS = [r'\{', r'\}', r'[\[\]]', r'\\begin\{.*?\}', r'\\end\{.*?\}', r'\^', r'\_', r'\\.*rule.*', r'\\.*line.*', r'\[[\-.0-9]+[epm][xtm]\]']
|
| 11 |
-
SKIP_Tokens = ['\\', '\\\\', '\\index', '\\a', '&', '$', '\\multirow', '\\def', '\\edef', '\\raggedright', '\\url', '\\cr', '\\ensuremath', '\\left', '\\right',
|
| 12 |
-
'\\mathchoice', '\\scriptstyle', '\\displaystyle', '\\qquad', '\\quad', '\\,', '\\!', '~', '\\boldmath', '\\gdef', '\\today', '\\the']
|
| 13 |
-
PHANTOM_Tokens = ['\\fontfamily', '\\vphantom', '\\phantom', '\\rowcolor', '\\ref', '\\thesubequation', '\\global', '\\theboldgroup']
|
| 14 |
-
TWO_Tail_Tokens = ['\\frac', '\\binom']
|
| 15 |
-
AB_Tail_Tokens = ['\\xrightarrow', '\\xleftarrow', '\\sqrt'] # special token \xxx [] {}
|
| 16 |
-
TWO_Tail_Invisb_Tokens = ['\\overset', '\\underset', '\\stackrel']
|
| 17 |
-
ONE_Tail_Tokens = ['\\widetilde', '\\overline', '\\hat', '\\widehat', '\\tilde', '\\Tilde', '\\dot', '\\bar', '\\vec', '\\underline', '\\underbrace', '\\check',
|
| 18 |
-
'\\breve', '\\Bar', '\\Vec', '\\mathring', '\\ddot', '\\Ddot', '\\dddot', '\\ddddot']
|
| 19 |
-
ONE_Tail_Invisb_Tokens = ['\\boldsymbol', '\\pmb', '\\textbf', '\\mathrm', '\\mathbf', '\\mathbb', '\\mathcal', '\\textmd', '\\texttt', '\\textnormal',
|
| 20 |
-
'\\text', '\\textit', '\\textup', '\\mathop', '\\mathbin', '\\smash', '\\operatorname', '\\textrm', '\\mathfrak', '\\emph',
|
| 21 |
-
'\\textsf', '\\textsc']
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
def flatten_multiline(latex):
|
| 25 |
-
brace_map = {
|
| 26 |
-
"\\left(": "\\right)",
|
| 27 |
-
"\\left[": "\\right]",
|
| 28 |
-
"\\left{": "\\right}",
|
| 29 |
-
}
|
| 30 |
-
l_split = latex.split(' ')
|
| 31 |
-
if l_split[0] == "\\begin{array}":
|
| 32 |
-
if l_split[-1] == "\\end{array}":
|
| 33 |
-
l_split = l_split[2:-1]
|
| 34 |
-
else:
|
| 35 |
-
l_split = l_split[2:]
|
| 36 |
-
|
| 37 |
-
idx = 0
|
| 38 |
-
while idx < len(l_split):
|
| 39 |
-
token = l_split[idx]
|
| 40 |
-
if token.startswith("\\left") and token in brace_map.keys():
|
| 41 |
-
end_idx = find_matching_brace(l_split, idx, brace=[token, brace_map[token]])
|
| 42 |
-
if end_idx != -1:
|
| 43 |
-
idx = end_idx
|
| 44 |
-
elif token in ["\\\\", "~", "\\qquad"]:
|
| 45 |
-
l_split = l_split[0:idx] + l_split[idx+1:]
|
| 46 |
-
idx -= 1
|
| 47 |
-
idx += 1
|
| 48 |
-
latex = ' '.join(l_split)
|
| 49 |
-
return "$ "+latex+" $"
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
def clean_latex(text):
|
| 53 |
-
# TODO 让GPT写的去空格函数, 初步测了是没问题的, 不确定是否完全没有bug
|
| 54 |
-
cleaned_text = re.sub(r'(?<=[^\\])\s+(?=[^\\])', '', text)
|
| 55 |
-
# TODO 有一些不能去掉的空格给补充回来
|
| 56 |
-
for item in ["\\hline", "\\midrule", "\\times", "\\bf", "\\footnotesize", "\\cr", '\\log']:
|
| 57 |
-
cleaned_text = cleaned_text.replace(item, item+" ")
|
| 58 |
-
cleaned_text = cleaned_text.replace(" \\mathcolor{black}", "\\mathcolor{black}")
|
| 59 |
-
return cleaned_text
|
| 60 |
-
|
| 61 |
-
def remove_trailing_latex(formula):
|
| 62 |
-
pattern = r'(\\(hspace\*?\{[^{}]*?\}|vspace\*?\{[^{}]*?\}|smallskip|medskip|quad|qquad|bigskip|[;,])|\~|\.)*$'
|
| 63 |
-
# Replace the matched pattern with an empty string
|
| 64 |
-
cleaned_formula = re.sub(pattern, '', formula, count=1)
|
| 65 |
-
return cleaned_formula
|
| 66 |
-
|
| 67 |
-
def find_matching_brace(sequence, start_index, brace=['{', '}']):
|
| 68 |
-
# Finds the index of the matching brace for the one at start_index
|
| 69 |
-
left_brace, right_brace = brace
|
| 70 |
-
depth = 0
|
| 71 |
-
for i, char in enumerate(sequence[start_index:], start=start_index):
|
| 72 |
-
if char == left_brace:
|
| 73 |
-
depth += 1
|
| 74 |
-
elif char == right_brace:
|
| 75 |
-
depth -= 1
|
| 76 |
-
if depth == 0:
|
| 77 |
-
return i
|
| 78 |
-
if depth > 0:
|
| 79 |
-
error_info = "Warning! found no matching brace in sequence !"
|
| 80 |
-
raise ValueError(error_info)
|
| 81 |
-
return -1
|
| 82 |
-
|
| 83 |
-
def normalize_latex(l, rm_trail=False):
|
| 84 |
-
if "tabular" in l:
|
| 85 |
-
latex_type = "tabular"
|
| 86 |
-
else:
|
| 87 |
-
latex_type = "formula"
|
| 88 |
-
|
| 89 |
-
if rm_trail:
|
| 90 |
-
l = remove_trailing_latex(l)
|
| 91 |
-
l = l.strip().replace(r'\pmatrix', r'\mypmatrix').replace(r'\matrix', r'\mymatrix')
|
| 92 |
-
|
| 93 |
-
# TODO \raggedright \arraybackslash, these align method, difficult to handle, remove it.
|
| 94 |
-
for item in ['\\raggedright', '\\arraybackslash']:
|
| 95 |
-
l = l.replace(item, "")
|
| 96 |
-
|
| 97 |
-
for item in ['\\lowercase', '\\uppercase']:
|
| 98 |
-
l = l.replace(item, "")
|
| 99 |
-
|
| 100 |
-
# TODO \hspace {1 . 5 cm}, for formula, change to \hspace{1.5cm}, for table, remove it.
|
| 101 |
-
pattern = r'\\[hv]space { [.0-9a-z ]+ }'
|
| 102 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 103 |
-
if latex_type == "tabular":
|
| 104 |
-
new_token = ["" for item in old_token]
|
| 105 |
-
else:
|
| 106 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 107 |
-
for bef, aft in zip(old_token, new_token):
|
| 108 |
-
l = l.replace(bef, aft)
|
| 109 |
-
|
| 110 |
-
# TODO take \begin {tabular} {} as one token
|
| 111 |
-
# TODO there are \begin{array} in table too,so the process should run in both formula and table.
|
| 112 |
-
if latex_type == "tabular":
|
| 113 |
-
l = l.replace("\\begin {tabular}", "\\begin{tabular}")
|
| 114 |
-
l = l.replace("\\end {tabular}", "\\end{tabular}")
|
| 115 |
-
l = l.replace("\\begin {array}", "\\begin{array}")
|
| 116 |
-
l = l.replace("\\end {array}", "\\end{array}")
|
| 117 |
-
l_split = l.split(' ')
|
| 118 |
-
idx = 0
|
| 119 |
-
while idx < len(l_split):
|
| 120 |
-
token = l_split[idx]
|
| 121 |
-
if token == "\\begin{tabular}":
|
| 122 |
-
sub_idx = idx + 1
|
| 123 |
-
end_idx = find_matching_brace(l_split, sub_idx)
|
| 124 |
-
new_token = "".join(l_split[idx: end_idx+1])
|
| 125 |
-
l_split = l_split[0:idx] + [new_token] + l_split[end_idx+1:]
|
| 126 |
-
break
|
| 127 |
-
idx += 1
|
| 128 |
-
l = ' '.join(l_split)
|
| 129 |
-
|
| 130 |
-
# TODO some complex format, hart to deal with re.match, so using brace match, such as:\cmidrule ( l { 3 p t } r { 3 p t } ) { 1 - 1 }
|
| 131 |
-
l_split = l.split(' ')
|
| 132 |
-
idx = 0
|
| 133 |
-
while idx < len(l_split):
|
| 134 |
-
token = l_split[idx]
|
| 135 |
-
if token in ["\\cmidrule", "\\cline"]:
|
| 136 |
-
sub_idx = idx + 1
|
| 137 |
-
if l_split[sub_idx] == "(":
|
| 138 |
-
mid_end = find_matching_brace(l_split, sub_idx, brace=['(', ')'])
|
| 139 |
-
end_idx = find_matching_brace(l_split, mid_end+1)
|
| 140 |
-
else:
|
| 141 |
-
end_idx = find_matching_brace(l_split, sub_idx)
|
| 142 |
-
new_token = "".join(l_split[idx: end_idx+1])
|
| 143 |
-
l_split = l_split[0:idx] + [new_token] + l_split[end_idx+1:]
|
| 144 |
-
idx += 1
|
| 145 |
-
l = ' '.join(l_split)
|
| 146 |
-
|
| 147 |
-
pattern = r'\\begin{array} { [lrc ]+ }'
|
| 148 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 149 |
-
new_token = [item.replace("\\begin{array} ", "<s>").replace(" ", "").replace("<s>", "\\begin{array} ") for item in old_token]
|
| 150 |
-
for bef, aft in zip(old_token, new_token):
|
| 151 |
-
l = l.replace(bef, aft)
|
| 152 |
-
|
| 153 |
-
# # TODO token such \not= should be one token
|
| 154 |
-
# pattern = r'\\not [<>+=\-]'
|
| 155 |
-
# old_token = re.findall(pattern, l, re.DOTALL)
|
| 156 |
-
# new_token = [item.replace(" ", "") for item in old_token]
|
| 157 |
-
# for bef, aft in zip(old_token, new_token):
|
| 158 |
-
# l = l.replace(bef, aft)
|
| 159 |
-
|
| 160 |
-
# # TODO \not xx shoudle be combined as one token
|
| 161 |
-
# pattern = r'\\not [\\=\<\>][^ ]+ '
|
| 162 |
-
# old_token = re.findall(pattern, l, re.DOTALL)
|
| 163 |
-
# new_token = [item.replace(" ", "") for item in old_token]
|
| 164 |
-
# for bef, aft in zip(old_token, new_token):
|
| 165 |
-
# l = l.replace(bef, aft+" ")
|
| 166 |
-
|
| 167 |
-
# TODO tokens such as \dots \exp \sinh, split them to parts, so the bbox match will be easier.
|
| 168 |
-
|
| 169 |
-
l = " "+l+" "
|
| 170 |
-
l = re.sub(r'(?<=\s)--(?=\s)', r'- -', l)
|
| 171 |
-
l = re.sub(r'(?<=\s)---(?=\s)', r'- - -', l)
|
| 172 |
-
l = re.sub(r'(?<=\s)…(?=\s)', r'. . .', l)
|
| 173 |
-
l = re.sub(r'(?<=\s)\\ldots(?=\s)', r'. . .', l)
|
| 174 |
-
l = re.sub(r'(?<=\s)\\hdots(?=\s)', r'. . .', l)
|
| 175 |
-
l = re.sub(r'(?<=\s)\\cdots(?=\s)', r'. . .', l)
|
| 176 |
-
l = re.sub(r'(?<=\s)\\dddot(?=\s)', r'. . .', l)
|
| 177 |
-
l = re.sub(r'(?<=\s)\\dots(?=\s)', r'. . .', l)
|
| 178 |
-
l = re.sub(r'(?<=\s)\\dotsc(?=\s)', r'. . .', l)
|
| 179 |
-
l = re.sub(r'(?<=\s)\\dotsi(?=\s)', r'. . .', l)
|
| 180 |
-
l = re.sub(r'(?<=\s)\\dotsm(?=\s)', r'. . .', l)
|
| 181 |
-
l = re.sub(r'(?<=\s)\\dotso(?=\s)', r'. . .', l)
|
| 182 |
-
l = re.sub(r'(?<=\s)\\dotsb(?=\s)', r'. . .', l)
|
| 183 |
-
l = re.sub(r'(?<=\s)\\mathellipsis(?=\s)', r'. . .', l)
|
| 184 |
-
l = re.sub(r'(?<=\s)\\ex(?=\s)', r'\\mathrm { e x }', l)
|
| 185 |
-
l = re.sub(r'(?<=\s)\\ln(?=\s)', r'\\mathrm { l n }', l)
|
| 186 |
-
l = re.sub(r'(?<=\s)\\lg(?=\s)', r'\\mathrm { l g }', l)
|
| 187 |
-
l = re.sub(r'(?<=\s)\\cot(?=\s)', r'\\mathrm { c o t }', l)
|
| 188 |
-
l = re.sub(r'(?<=\s)\\mod(?=\s)', r'\\mathrm { m o d }', l)
|
| 189 |
-
l = re.sub(r'(?<=\s)\\bmod(?=\s)', r'\\mathrm { m o d }', l)
|
| 190 |
-
l = re.sub(r'(?<=\s)\\pmod(?=\s)', r'\\mathrm { m o d }', l) # \pmod 其实和mod不一样,但是不太好处理,暂时替换为\mod
|
| 191 |
-
l = re.sub(r'(?<=\s)\\min(?=\s)', r'\\mathrm { m i n }', l)
|
| 192 |
-
l = re.sub(r'(?<=\s)\\max(?=\s)', r'\\mathrm { m a x }', l)
|
| 193 |
-
l = re.sub(r'(?<=\s)\\ker(?=\s)', r'\\mathrm { k e r }', l)
|
| 194 |
-
l = re.sub(r'(?<=\s)\\hom(?=\s)', r'\\mathrm { h o m }', l)
|
| 195 |
-
l = re.sub(r'(?<=\s)\\sec(?=\s)', r'\\mathrm { s e c }', l)
|
| 196 |
-
l = re.sub(r'(?<=\s)\\scs(?=\s)', r'\\mathrm { s c s }', l)
|
| 197 |
-
l = re.sub(r'(?<=\s)\\csc(?=\s)', r'\\mathrm { c s c }', l)
|
| 198 |
-
l = re.sub(r'(?<=\s)\\deg(?=\s)', r'\\mathrm { d e g }', l)
|
| 199 |
-
l = re.sub(r'(?<=\s)\\arg(?=\s)', r'\\mathrm { a r g }', l)
|
| 200 |
-
l = re.sub(r'(?<=\s)\\log(?=\s)', r'\\mathrm { l o g }', l)
|
| 201 |
-
l = re.sub(r'(?<=\s)\\dim(?=\s)', r'\\mathrm { d i m }', l)
|
| 202 |
-
l = re.sub(r'(?<=\s)\\exp(?=\s)', r'\\mathrm { e x p }', l)
|
| 203 |
-
l = re.sub(r'(?<=\s)\\sin(?=\s)', r'\\mathrm { s i n }', l)
|
| 204 |
-
l = re.sub(r'(?<=\s)\\cos(?=\s)', r'\\mathrm { c o s }', l)
|
| 205 |
-
l = re.sub(r'(?<=\s)\\tan(?=\s)', r'\\mathrm { t a n }', l)
|
| 206 |
-
l = re.sub(r'(?<=\s)\\tanh(?=\s)', r'\\mathrm { t a n h }', l)
|
| 207 |
-
l = re.sub(r'(?<=\s)\\cosh(?=\s)', r'\\mathrm { c o s h }', l)
|
| 208 |
-
l = re.sub(r'(?<=\s)\\sinh(?=\s)', r'\\mathrm { s i n h }', l)
|
| 209 |
-
l = re.sub(r'(?<=\s)\\coth(?=\s)', r'\\mathrm { c o t h }', l)
|
| 210 |
-
l = re.sub(r'(?<=\s)\\arcsin(?=\s)', r'\\mathrm { a r c s i n }', l)
|
| 211 |
-
l = re.sub(r'(?<=\s)\\arccos(?=\s)', r'\\mathrm { a r c c o s }', l)
|
| 212 |
-
l = re.sub(r'(?<=\s)\\arctan(?=\s)', r'\\mathrm { a r c t a n }', l)
|
| 213 |
-
|
| 214 |
-
# ** token such as \string xxx should be one token
|
| 215 |
-
pattern = r'\\string [^ ]+ '
|
| 216 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 217 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 218 |
-
for bef, aft in zip(old_token, new_token):
|
| 219 |
-
l = l.replace(bef, aft+" ")
|
| 220 |
-
|
| 221 |
-
# ** token such as \big( should be one token
|
| 222 |
-
pattern = r'\\[Bb]ig[g]?[glrm]? [(){}|\[\]] '
|
| 223 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 224 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 225 |
-
for bef, aft in zip(old_token, new_token):
|
| 226 |
-
l = l.replace(bef, aft+" ")
|
| 227 |
-
|
| 228 |
-
pattern = r'\\[Bb]ig[g]?[glrm]? \\.*? '
|
| 229 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 230 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 231 |
-
for bef, aft in zip(old_token, new_token):
|
| 232 |
-
l = l.replace(bef, aft+" ")
|
| 233 |
-
|
| 234 |
-
# TODO when \operatorname * meets mathcolor it comes error, yet the * is useless, so we simply remove it bynow.
|
| 235 |
-
pattern = r'\\operatorname \*'
|
| 236 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 237 |
-
new_token = ["\\operatorname" for item in old_token]
|
| 238 |
-
for bef, aft in zip(old_token, new_token):
|
| 239 |
-
l = l.replace(bef, aft)
|
| 240 |
-
|
| 241 |
-
# TODO \lefteqn will lead to letter overlap, it's harmfull for render, so simply remove it.
|
| 242 |
-
l = l.replace("\\lefteqn", "")
|
| 243 |
-
|
| 244 |
-
# TODO \footnote can not seem as ONE_Tail_Invisb_Tokens(usually this type token add color by \mathrm {\color(x)}, yet \footnode should be \color{\footnote{x}}), so we simple change it to "^".
|
| 245 |
-
l = l.replace("\\footnote ", "^ ")
|
| 246 |
-
|
| 247 |
-
# TODO \' can not be rendered separately(cause to different visulize performence), so we take these tokens as one token such as \' e -> \'e, on the other hand, if { after \' then render them separately.
|
| 248 |
-
pattern = r'\\\' [^{] '
|
| 249 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 250 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 251 |
-
for bef, aft in zip(old_token, new_token):
|
| 252 |
-
l = l.replace(bef, aft+" ")
|
| 253 |
-
|
| 254 |
-
# TODO [ -1.5ex ] [ 1.5pt ] [ 3 mm ] some layout adjustment, no need to render. combine them as one token.
|
| 255 |
-
if latex_type == "tabular":
|
| 256 |
-
pattern = r'\[ [\-.0-9 ]+[exptcm ]+ \]'
|
| 257 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 258 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 259 |
-
for bef, aft in zip(old_token, new_token):
|
| 260 |
-
l = l.replace(bef, aft)
|
| 261 |
-
|
| 262 |
-
# ** \parbox { 3cm } {} shoudle be combined as one token
|
| 263 |
-
pattern = r'\\parbox {[^{]+}'
|
| 264 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 265 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 266 |
-
for bef, aft in zip(old_token, new_token):
|
| 267 |
-
l = l.replace(bef, aft)
|
| 268 |
-
|
| 269 |
-
# ** \raisebox{<lift>}[<height>][<depth>] {} shoudle be combined as one token, \raisebox{-1.5ex}[0pt]
|
| 270 |
-
pattern = r'\\raisebox {[^{]+} [\[\]0-9 exptcm]+{'
|
| 271 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 272 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 273 |
-
for bef, aft in zip(old_token, new_token):
|
| 274 |
-
l = l.replace(bef, aft[0:-1]+" {")
|
| 275 |
-
|
| 276 |
-
# ** \char shoudle be combined as one token
|
| 277 |
-
pattern = r'{ \\char[0-9\' ]+}'
|
| 278 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 279 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 280 |
-
for bef, aft in zip(old_token, new_token):
|
| 281 |
-
l = l.replace(bef, "{ "+aft[1:-1]+" }")
|
| 282 |
-
|
| 283 |
-
# ** \rule{1pt}{2pt} lines, shoudle be combined as one token and do not render
|
| 284 |
-
pattern = r'\\rule {[ .0-9a-z]+} {[ .0-9a-z]+}'
|
| 285 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 286 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 287 |
-
for bef, aft in zip(old_token, new_token):
|
| 288 |
-
l = l.replace(bef, aft)
|
| 289 |
-
|
| 290 |
-
# ** \specialrule{1pt}{2pt}{2pt}, special lines, shoudle be combined as one token
|
| 291 |
-
pattern = r'\\specialrule {[ .0-9a-z]+} {[ .0-9a-z]+} {[ .0-9a-z]+}'
|
| 292 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 293 |
-
new_token = [item.replace(" ", "") for item in old_token]
|
| 294 |
-
for bef, aft in zip(old_token, new_token):
|
| 295 |
-
l = l.replace(bef, aft)
|
| 296 |
-
|
| 297 |
-
# ** for easier add color, the original color should be removed, there are two type of color for now: \color[rgb]{0, 1, 0} and \color{red}
|
| 298 |
-
pattern = r'\\colorbox[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } |\\color[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } |\\textcolor[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } |\\cellcolor[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } '
|
| 299 |
-
old_token = re.findall(pattern, l, re.DOTALL)
|
| 300 |
-
for bef in old_token:
|
| 301 |
-
l = l.replace(bef, "")
|
| 302 |
-
|
| 303 |
-
# ** filling the missing brace [] and {} according to token.
|
| 304 |
-
l_split = l.split(' ')
|
| 305 |
-
idx = 0
|
| 306 |
-
while idx < len(l_split):
|
| 307 |
-
token = l_split[idx]
|
| 308 |
-
if token in ONE_Tail_Tokens + ONE_Tail_Invisb_Tokens:
|
| 309 |
-
# ** normalize tokens such as \hat, fill missing the {}, such as \hat \lambda -> \hat {\lambda}
|
| 310 |
-
sub_idx = idx + 1
|
| 311 |
-
while sub_idx < len(l_split) and l_split[sub_idx] in ONE_Tail_Tokens+ONE_Tail_Invisb_Tokens:
|
| 312 |
-
sub_idx += 1
|
| 313 |
-
new_split = l_split[0:idx]
|
| 314 |
-
for ii in range(idx, sub_idx):
|
| 315 |
-
new_split = new_split + [l_split[ii], "{"]
|
| 316 |
-
if l_split[sub_idx] != "{":
|
| 317 |
-
new_split = new_split + [l_split[sub_idx]] + ["}"]*(sub_idx-idx)
|
| 318 |
-
l_split = new_split + l_split[sub_idx+1:]
|
| 319 |
-
else:
|
| 320 |
-
end_idx = find_matching_brace(l_split, sub_idx)
|
| 321 |
-
new_split = new_split + l_split[sub_idx+1:end_idx] + ["}"]*(sub_idx-idx)
|
| 322 |
-
l_split = new_split + l_split[end_idx+1:]
|
| 323 |
-
elif token in AB_Tail_Tokens:
|
| 324 |
-
# ** normalize special tokens such as \sqrt, fill the missing [] {} in \sqrt [] {}, yet the [] is optional, for example: \sqrt A B -> \sqrt {A} B and \sqrt [A] B -> \sqrt [A] {B}
|
| 325 |
-
if l_split[idx + 1] != "[" and l_split[idx + 1] != "{":
|
| 326 |
-
l_split = l_split[0:idx+1] + ["{"] + [l_split[idx+1]] + ["}"] + l_split[idx+2:]
|
| 327 |
-
else:
|
| 328 |
-
if l_split[idx + 1] == "[":
|
| 329 |
-
end1 = find_matching_brace(l_split, idx+1, brace=['[', ']'])
|
| 330 |
-
else:
|
| 331 |
-
end1 = idx
|
| 332 |
-
if l_split[end1 + 1] != "{":
|
| 333 |
-
l_split = l_split[0:end1+1] + ["{"] + [l_split[end1+1]] + ["}"] + l_split[end1+2:]
|
| 334 |
-
elif token in TWO_Tail_Tokens + TWO_Tail_Invisb_Tokens:
|
| 335 |
-
# ** normalize special tokens such as \frac, add missing brace in \frac {A} {B} for example: \frac {\lambda} 2 -> \frac {\lambda} {2}
|
| 336 |
-
if l_split[idx + 1] != "{":
|
| 337 |
-
l_split = l_split[0:idx+1] + ["{"] + [l_split[idx+1]] + ["}"] + l_split[idx+2:]
|
| 338 |
-
end1 = find_matching_brace(l_split, idx+1)
|
| 339 |
-
if l_split[end1 + 1] != "{":
|
| 340 |
-
l_split = l_split[0:end1+1] + ["{"] + [l_split[end1+1]] + ["}"] + l_split[end1+2:]
|
| 341 |
-
|
| 342 |
-
idx += 1
|
| 343 |
-
l = ' '.join(l_split)
|
| 344 |
-
|
| 345 |
-
return l
|
| 346 |
-
|
| 347 |
-
def token_add_color(l_split, idx, render_dict):
|
| 348 |
-
token = l_split[idx]
|
| 349 |
-
if token in PHANTOM_Tokens:
|
| 350 |
-
# ** special tokens that do not need render, skip it
|
| 351 |
-
if l_split[idx + 1] == '{':
|
| 352 |
-
brace_end = find_matching_brace(l_split, idx + 1)
|
| 353 |
-
else:
|
| 354 |
-
brace_end = idx + 1
|
| 355 |
-
next_idx = brace_end + 1
|
| 356 |
-
elif token in TWO_Tail_Tokens:
|
| 357 |
-
# ** tokens such as \frac A B, and the token needs render too.
|
| 358 |
-
num_start = idx + 1
|
| 359 |
-
num_end = find_matching_brace(l_split, num_start)
|
| 360 |
-
den_start = num_end + 1
|
| 361 |
-
den_end = find_matching_brace(l_split, den_start)
|
| 362 |
-
l_split_copy = l_split[:idx] + [r'\mathcolor{black}{'+token+'{'] + \
|
| 363 |
-
[r'\mathcolor{gray}{'] + l_split[num_start + 1:num_end] + \
|
| 364 |
-
['}'] + [r'}{'] + [r'\mathcolor{gray}{'] + l_split[den_start + 1:den_end] + \
|
| 365 |
-
['}'] + ['}'] + ['}'] + l_split[den_end + 1:]
|
| 366 |
-
|
| 367 |
-
l_new = ' '.join(l_split_copy)
|
| 368 |
-
l_new = r'\mathcolor{gray}{ ' + l_new + ' }'
|
| 369 |
-
render_dict[str(idx)] = l_new, token
|
| 370 |
-
next_idx = idx + 1
|
| 371 |
-
elif token in ONE_Tail_Tokens:
|
| 372 |
-
# ** tokens such as \hat A, and the token needs render too.
|
| 373 |
-
num_start = idx + 1
|
| 374 |
-
num_end = find_matching_brace(l_split, num_start)
|
| 375 |
-
l_split_copy = l_split[:idx] + [r'\mathcolor{black}{'] + l_split[idx: num_start+1] + \
|
| 376 |
-
[r'\mathcolor{gray}{'] + l_split[num_start+1: num_end] + \
|
| 377 |
-
['}'] + l_split[num_end: num_end+1] + ['}'] + l_split[num_end+1:]
|
| 378 |
-
l_new = ' '.join(l_split_copy)
|
| 379 |
-
l_new = r'\mathcolor{gray}{ ' + l_new + ' }'
|
| 380 |
-
render_dict[str(idx)] = l_new, token
|
| 381 |
-
next_idx = idx + 1
|
| 382 |
-
elif token in ONE_Tail_Invisb_Tokens:
|
| 383 |
-
# ** tokens such as \text A B, and the token does not need render.
|
| 384 |
-
num_start = idx + 1
|
| 385 |
-
num_end = find_matching_brace(l_split, num_start)
|
| 386 |
-
sub_idx = num_start+1
|
| 387 |
-
if num_end-num_start == 2:
|
| 388 |
-
l_split_copy = l_split.copy()
|
| 389 |
-
l_split_copy[sub_idx] = r'{\mathcolor{black}{' + l_split_copy[sub_idx] + '}}'
|
| 390 |
-
l_new = ' '.join(l_split_copy)
|
| 391 |
-
l_new = r'\mathcolor{gray}{ ' + l_new + ' }'
|
| 392 |
-
render_dict[str(idx)] = l_new, l_split[sub_idx]
|
| 393 |
-
next_idx = num_end
|
| 394 |
-
else:
|
| 395 |
-
while sub_idx < num_end:
|
| 396 |
-
l_split, sub_idx, render_dict = token_add_color(l_split, sub_idx, render_dict)
|
| 397 |
-
next_idx = num_end + 1
|
| 398 |
-
elif token in AB_Tail_Tokens:
|
| 399 |
-
# ** special token \xrightarrow, could be \xrightarrow [] {} or \xrightarrow {}, process method are different.
|
| 400 |
-
if l_split[idx+1] == '{':
|
| 401 |
-
num_start = idx + 1
|
| 402 |
-
num_end = find_matching_brace(l_split, num_start)
|
| 403 |
-
l_split_copy = l_split[:idx] + [r'\mathcolor{black}{'] + l_split[idx: idx+2] \
|
| 404 |
-
+ [r'\mathcolor{gray}{'] + l_split[num_start+1: num_end] + ['}}'] + l_split[num_end:]
|
| 405 |
-
l_new = ' '.join(l_split_copy)
|
| 406 |
-
l_new = r'\mathcolor{gray}{ ' + l_new + ' }'
|
| 407 |
-
render_dict[str(idx)] = l_new, token
|
| 408 |
-
sub_idx = num_start+1
|
| 409 |
-
while sub_idx < num_end:
|
| 410 |
-
l_split, sub_idx, render_dict = token_add_color(l_split, sub_idx, render_dict)
|
| 411 |
-
next_idx = num_end + 1
|
| 412 |
-
elif l_split[idx+1] == '[':
|
| 413 |
-
num_start = idx + 1
|
| 414 |
-
num_end = find_matching_brace(l_split, num_start, brace=['[', ']'])
|
| 415 |
-
den_start = num_end + 1
|
| 416 |
-
den_end = find_matching_brace(l_split, den_start)
|
| 417 |
-
l_split_copy = l_split[:idx] + [r'{\mathcolor{black}{'] + l_split[idx: idx+2] \
|
| 418 |
-
+ [r'\mathcolor{gray}{'] + l_split[idx+2: num_end] + ['}'] + l_split[num_end:den_start+1] \
|
| 419 |
-
+ [r'\mathcolor{gray}{'] + l_split[den_start+1: den_end] + ['}'] + l_split[den_end: den_end+1] \
|
| 420 |
-
+ ['}}'] + l_split[den_end+1:]
|
| 421 |
-
l_new = ' '.join(l_split_copy)
|
| 422 |
-
l_new = r'\mathcolor{gray}{ ' + l_new + ' }'
|
| 423 |
-
render_dict[str(idx)] = l_new, token
|
| 424 |
-
sub_idx = num_start + 1
|
| 425 |
-
while sub_idx < num_end:
|
| 426 |
-
l_split, sub_idx, render_dict = token_add_color(l_split, sub_idx, render_dict)
|
| 427 |
-
sub_idx = den_start + 1
|
| 428 |
-
while sub_idx < den_end:
|
| 429 |
-
l_split, sub_idx, render_dict = token_add_color(l_split, sub_idx, render_dict)
|
| 430 |
-
next_idx = den_end + 1
|
| 431 |
-
elif token in ["\\multicolumn", "\\multirow"]:
|
| 432 |
-
# ** tokens with three {}, such as \multicolumn {} {} {}, the text in third {} need be rendered.
|
| 433 |
-
first_start = idx + 1
|
| 434 |
-
first_end = find_matching_brace(l_split, first_start)
|
| 435 |
-
second_start = first_end + 1
|
| 436 |
-
second_end = find_matching_brace(l_split, second_start)
|
| 437 |
-
third_start = second_end + 1
|
| 438 |
-
third_end = find_matching_brace(l_split, third_start)
|
| 439 |
-
|
| 440 |
-
sub_idx = third_start+1
|
| 441 |
-
while sub_idx < third_end:
|
| 442 |
-
l_split, sub_idx, render_dict = token_add_color(l_split, sub_idx, render_dict)
|
| 443 |
-
next_idx = third_end + 1
|
| 444 |
-
elif token in SKIP_Tokens+TWO_Tail_Invisb_Tokens or any(re.match(pattern, token) for pattern in SKIP_PATTERNS):
|
| 445 |
-
# ** tokens no need render, just skip
|
| 446 |
-
# print('skip', idx, token)
|
| 447 |
-
# TODO special case :[], could be single, or in \sqrt[]{}.
|
| 448 |
-
if (token == "[" and l_split[idx-1]!="\\sqrt") or (token == "]" and idx>=3 and l_split[idx-3]!="\\sqrt"):
|
| 449 |
-
l_split_copy = l_split.copy()
|
| 450 |
-
l_split_copy[idx] = r'\mathcolor{black}{ ' + l_split_copy[idx] + ' }'
|
| 451 |
-
l_new = ' '.join(l_split_copy)
|
| 452 |
-
l_new = r'\mathcolor{gray}{ ' + l_new + ' }'
|
| 453 |
-
render_dict[str(idx)] = l_new, token
|
| 454 |
-
next_idx = idx + 1
|
| 455 |
-
else:
|
| 456 |
-
next_idx = idx + 1
|
| 457 |
-
else:
|
| 458 |
-
# ** nomal token
|
| 459 |
-
l_split_copy = l_split.copy()
|
| 460 |
-
# TODO sometimes there is translation after add color, the exp prove that \mathcolor{black}{ A } is better than \mathcolor{black}{A}
|
| 461 |
-
l_split_copy[idx] = r'\mathcolor{black}{ ' + l_split_copy[idx] + ' }'
|
| 462 |
-
|
| 463 |
-
l_new = ' '.join(l_split_copy)
|
| 464 |
-
l_new = r'\mathcolor{gray}{ ' + l_new + ' }'
|
| 465 |
-
render_dict[str(idx)] = l_new, token
|
| 466 |
-
next_idx = idx + 1
|
| 467 |
-
|
| 468 |
-
return l_split, next_idx, render_dict
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
def token_add_color_RGB(l_split, idx, token_list, brace_color=False):
|
| 472 |
-
"""using \mathcolor[RGB]{r,g,b} to render latex.
|
| 473 |
-
"""
|
| 474 |
-
token = l_split[idx]
|
| 475 |
-
if not token:
|
| 476 |
-
next_idx = idx + 1
|
| 477 |
-
elif token in PHANTOM_Tokens:
|
| 478 |
-
# ** special tokens that do not need render, skip it
|
| 479 |
-
if l_split[idx + 1] == '{':
|
| 480 |
-
brace_end = find_matching_brace(l_split, idx + 1)
|
| 481 |
-
else:
|
| 482 |
-
brace_end = idx + 1
|
| 483 |
-
next_idx = brace_end + 1
|
| 484 |
-
elif token in TWO_Tail_Tokens:
|
| 485 |
-
# ** tokens such as \frac A B, and the token needs render too.
|
| 486 |
-
num_start = idx + 1
|
| 487 |
-
num_end = find_matching_brace(l_split, num_start)
|
| 488 |
-
den_start = num_end + 1
|
| 489 |
-
den_end = find_matching_brace(l_split, den_start)
|
| 490 |
-
color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace("<idx>", str(len(token_list)))
|
| 491 |
-
l_split = l_split[:idx] + [color_token+token] + l_split[idx+1: den_end+1] + ["}"] + l_split[den_end+1:]
|
| 492 |
-
token_list.append(token)
|
| 493 |
-
next_idx = idx + 1
|
| 494 |
-
elif token in ONE_Tail_Tokens:
|
| 495 |
-
# ** tokens such as \hat A, and the token needs render too.
|
| 496 |
-
num_start = idx + 1
|
| 497 |
-
num_end = find_matching_brace(l_split, num_start)
|
| 498 |
-
color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace("<idx>", str(len(token_list)))
|
| 499 |
-
if token != "\\underbrace" and num_end+1 < len(l_split) and l_split[num_end+1] == "_":
|
| 500 |
-
l_split = l_split[:idx] + ["{"+color_token+token] + l_split[idx+1: num_end+1] + ["}}"] + l_split[num_end+1:]
|
| 501 |
-
else:
|
| 502 |
-
l_split = l_split[:idx] + [color_token+token] + l_split[idx+1: num_end+1] + ["}"] + l_split[num_end+1:]
|
| 503 |
-
token_list.append(token)
|
| 504 |
-
next_idx = idx + 1
|
| 505 |
-
elif token in ONE_Tail_Invisb_Tokens:
|
| 506 |
-
# ** tokens such as \text A B, and the token does not need render.
|
| 507 |
-
num_start = idx + 1
|
| 508 |
-
num_end = find_matching_brace(l_split, num_start)
|
| 509 |
-
sub_idx = num_start+1
|
| 510 |
-
if num_end-num_start == 2:
|
| 511 |
-
color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace("<idx>", str(len(token_list)))
|
| 512 |
-
token_list.append(l_split[num_start+1])
|
| 513 |
-
l_split = l_split[:num_start+1] + [color_token+l_split[num_start+1]+"}"] + l_split[num_end:]
|
| 514 |
-
else:
|
| 515 |
-
while sub_idx < num_end:
|
| 516 |
-
l_split, sub_idx, token_list = token_add_color_RGB(l_split, sub_idx, token_list)
|
| 517 |
-
next_idx = num_end + 1
|
| 518 |
-
elif token in AB_Tail_Tokens:
|
| 519 |
-
# ** special token \xrightarrow, could be \xrightarrow [] {} or \xrightarrow {}, process method are different.
|
| 520 |
-
if l_split[idx+1] == '{':
|
| 521 |
-
num_start = idx + 1
|
| 522 |
-
num_end = find_matching_brace(l_split, num_start)
|
| 523 |
-
color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace("<idx>", str(len(token_list)))
|
| 524 |
-
l_split = l_split[:idx] + [color_token+token] + l_split[idx+1: num_end+1] + ["}"] + l_split[num_end+1:]
|
| 525 |
-
token_list.append(token)
|
| 526 |
-
sub_idx = num_start+1
|
| 527 |
-
while sub_idx < num_end:
|
| 528 |
-
l_split, sub_idx, token_list = token_add_color_RGB(l_split, sub_idx, token_list)
|
| 529 |
-
next_idx = num_end + 1
|
| 530 |
-
elif l_split[idx+1] == '[':
|
| 531 |
-
num_start = idx + 1
|
| 532 |
-
num_end = find_matching_brace(l_split, num_start, brace=['[', ']'])
|
| 533 |
-
den_start = num_end + 1
|
| 534 |
-
den_end = find_matching_brace(l_split, den_start)
|
| 535 |
-
color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace("<idx>", str(len(token_list)))
|
| 536 |
-
l_split = l_split[:idx] + [color_token+token] + l_split[idx+1: den_end+1] + ["}"] + l_split[den_end+1:]
|
| 537 |
-
token_list.append(token)
|
| 538 |
-
sub_idx = num_start + 1
|
| 539 |
-
while sub_idx < num_end:
|
| 540 |
-
l_split, sub_idx, token_list = token_add_color_RGB(l_split, sub_idx, token_list, brace_color=True)
|
| 541 |
-
sub_idx = den_start + 1
|
| 542 |
-
while sub_idx < den_end:
|
| 543 |
-
l_split, sub_idx, token_list = token_add_color_RGB(l_split, sub_idx, token_list)
|
| 544 |
-
next_idx = den_end + 1
|
| 545 |
-
elif token in ["\\multicolumn", "\\multirow"]:
|
| 546 |
-
# ** tokens with three {}, such as \multicolumn {} {} {}, the text in third {} need be rendered.
|
| 547 |
-
first_start = idx + 1
|
| 548 |
-
first_end = find_matching_brace(l_split, first_start)
|
| 549 |
-
second_start = first_end + 1
|
| 550 |
-
second_end = find_matching_brace(l_split, second_start)
|
| 551 |
-
third_start = second_end + 1
|
| 552 |
-
third_end = find_matching_brace(l_split, third_start)
|
| 553 |
-
|
| 554 |
-
sub_idx = third_start+1
|
| 555 |
-
while sub_idx < third_end:
|
| 556 |
-
l_split, sub_idx, token_list = token_add_color_RGB(l_split, sub_idx, token_list)
|
| 557 |
-
next_idx = third_end + 1
|
| 558 |
-
elif token in SKIP_Tokens+TWO_Tail_Invisb_Tokens or any(re.match(pattern, token) for pattern in SKIP_PATTERNS):
|
| 559 |
-
# ** tokens no need render, just skip
|
| 560 |
-
# print('skip', idx, token)
|
| 561 |
-
# TODO special case :[], could be single, or in \sqrt[]{}.
|
| 562 |
-
if (token == "[" and l_split[idx-1]!="\\sqrt") or (token == "]" and idx>=3 and l_split[idx-3]!="\\sqrt"):
|
| 563 |
-
color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace("<idx>", str(len(token_list)))
|
| 564 |
-
l_split = l_split[:idx] + [color_token + l_split[idx] + "}"] + l_split[idx+1:]
|
| 565 |
-
token_list.append(token)
|
| 566 |
-
next_idx = idx + 1
|
| 567 |
-
else:
|
| 568 |
-
next_idx = idx + 1
|
| 569 |
-
else:
|
| 570 |
-
# ** nomal token
|
| 571 |
-
if brace_color or (idx > 1 and l_split[idx-1] == "_"):
|
| 572 |
-
color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace("<idx>", str(len(token_list)))
|
| 573 |
-
l_split = l_split[:idx] + ["{" + color_token + l_split[idx] + "}}"] + l_split[idx+1:]
|
| 574 |
-
token_list.append(token)
|
| 575 |
-
next_idx = idx + 1
|
| 576 |
-
else:
|
| 577 |
-
color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace("<idx>", str(len(token_list)))
|
| 578 |
-
l_split = l_split[:idx] + [color_token + l_split[idx] + "}"] + l_split[idx+1:]
|
| 579 |
-
token_list.append(token)
|
| 580 |
-
next_idx = idx + 1
|
| 581 |
-
return l_split, next_idx, token_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|