Spaces:

SoMark
/

FastCDM

Running

FastCDM / fastcdm /latex_processor.py

BinyangQiu

first commit

ae1d809 6 months ago

29.3 kB

	import re

	# 以下列表定义了在后续 token_add_color 系列函数中“跳过”的正则模式。
	# 任何匹配这些模式的 token 都不会被着色，而是保持原样（通常用黑色标记）。
	# 主要用于括号、环境边界、上下标等结构元素。
	SKIP_PATTERNS = [
	r"\{",
	r"\}",
	r"[\[\]]",
	r"\\begin\{.*?\}",
	r"\\end\{.*?\}",
	r"\^",
	r"\_",
	r"\\.rule.",
	r"\\.line.",
	r"\[[\-.0-9]+[epm][xtm]\]",
	]

	# 以下列表中的 LaTeX 命令在后续处理中被视为“透明”或“无意义”的 token。
	# 它们不会触发着色逻辑，直接跳过，避免干扰真正的数学内容。
	SKIP_Tokens = [
	"\\",
	"\\\\",
	"\\index",
	"\\a",
	"&",
	"$",
	"\\multirow",
	"\\def",
	"\\edef",
	"\\raggedright",
	"\\url",
	"\\cr",
	"\\ensuremath",
	"\\left",
	"\\left[",
	"\\left(",
	"\\left{",
	"\\right",
	"\\right]",
	"\\right)",
	"\\right}",
	"\\mathchoice",
	"\\scriptstyle",
	"\\displaystyle",
	"\\qquad",
	"\\quad",
	"\\,",
	"\\!",
	"~",
	"\\boldmath",
	"\\gdef",
	"\\today",
	"\\the",
	]

	# PHANTOM_Tokens 中的命令在后续着色时被视为“幻影”命令：
	# 它们本身不直接参与颜色标记，但其参数仍需递归处理。
	# 主要用于字体、引用、颜色等不影响数学结构的命令。
	PHANTOM_Tokens = [
	"\\fontfamily",
	"\\vphantom",
	"\\phantom",
	"\\rowcolor",
	"\\ref",
	"\\thesubequation",
	"\\global",
	"\\theboldgroup",
	]

	# 以下命令在后续处理中被识别为“双尾”命令：它们需要两个 {} 参数。
	# 例如 \frac{分子}{分母}，在着色时会分别对两个参数进行灰色处理。
	TWO_Tail_Tokens = ["\\frac", "\\binom"]

	# AB_Tail_Tokens 中的命令具有“可选+必选”参数结构：
	# 第一个参数可以是 []，第二个必须是 {}。
	# 例如 \xrightarrow[下方]{上方}，在着色时会分别处理两个参数。
	AB_Tail_Tokens = ["\\xrightarrow", "\\xleftarrow", "\\sqrt"] # special token \xxx [] {}

	# 以下命令也是“双尾”但被视为“不可见”结构，着色逻辑与 TWO_Tail_Tokens 类似，
	# 但通常用于上下堆叠等排版，不影响数学含义。
	TWO_Tail_Invisb_Tokens = ["\\overset", "\\underset", "\\stackrel"]

	# ONE_Tail_Tokens 中的命令只需一个 {} 参数，且会显著改变数学符号外观。
	# 在着色时，命令本身保持黑色，参数内容置灰。
	ONE_Tail_Tokens = [
	"\\widetilde",
	"\\overline",
	"\\hat",
	"\\widehat",
	"\\tilde",
	"\\Tilde",
	"\\dot",
	"\\bar",
	"\\vec",
	"\\underline",
	"\\underbrace",
	"\\check",
	"\\breve",
	"\\Bar",
	"\\Vec",
	"\\mathring",
	"\\ddot",
	"\\Ddot",
	"\\dddot",
	"\\ddddot",
	]

	# ONE_Tail_Invisb_Tokens 中的命令同样只需一个 {} 参数，
	# 但主要用于字体或样式切换，不改变数学含义，因此整体视为“不可见”，
	# 在着色时命令本身不标记，仅对其参数递归处理。
	ONE_Tail_Invisb_Tokens = [
	"\\boldsymbol",
	"\\pmb",
	"\\textbf",
	"\\mathrm",
	"\\mathbf",
	"\\mathsf",
	"\\mathbb",
	"\\mathcal",
	"\\mathinner",
	"\\mathit",
	"\\mathnormal",
	"\\mathring",
	"\\mathscr",
	"\\mathtt",
	"\\textmd",
	"\\texttt",
	"\\textnormal",
	"\\text",
	"\\textit",
	"\\textup",
	"\\mathop",
	"\\mathbin",
	"\\smash",
	"\\operatorname",
	"\\textrm",
	"\\mathfrak",
	"\\emph",
	"\\textsf",
	"\\textsc",
	]


	def flatten_multiline(latex):
	brace_map = {
	"\\left(": "\\right)",
	"\\left[": "\\right]",
	"\\left{": "\\right}",
	}
	l_split = latex.split(" ")
	if l_split[0] == "\\begin{array}":
	if l_split[-1] == "\\end{array}":
	l_split = l_split[2:-1]
	else:
	l_split = l_split[2:]

	idx = 0
	while idx < len(l_split):
	token = l_split[idx]
	if token.startswith("\\left") and token in brace_map.keys():
	end_idx = find_matching_brace(l_split, idx, brace=[token, brace_map[token]])
	if end_idx != -1:
	idx = end_idx
	elif token in ["\\\\", "~", "\\qquad"]:
	l_split = l_split[0:idx] + l_split[idx + 1 :]
	idx -= 1
	idx += 1
	latex = " ".join(l_split)
	return "$ " + latex + " $"


	def clean_latex(text):
	cleaned_text = re.sub(r"(?<=[^\\])\s+(?=[^\\])", "", text)
	for item in [
	"\\hline",
	"\\midrule",
	"\\times",
	"\\bf",
	"\\footnotesize",
	"\\cr",
	"\\log",
	]:
	cleaned_text = cleaned_text.replace(item, item + " ")
	cleaned_text = cleaned_text.replace(" \\mathcolor{black}", "\\mathcolor{black}")
	return cleaned_text


	def remove_trailing_latex(formula):
	pattern = r"(\\(hspace\?\{[^{}]?\}\|vspace\?\{[^{}]?\}\|smallskip\|medskip\|quad\|qquad\|bigskip\|[;,])\|\~\|\.)*$"
	cleaned_formula = re.sub(pattern, "", formula, count=1)
	return cleaned_formula


	def find_matching_brace(sequence, start_index, brace=["{", "}"]):
	left_brace, right_brace = brace
	depth = 0
	for i, char in enumerate(sequence[start_index:], start=start_index):
	if char == left_brace:
	depth += 1
	elif char == right_brace:
	depth -= 1
	if depth == 0:
	return i
	if depth > 0:
	error_info = "Warning! found no matching brace in sequence !"
	raise ValueError(error_info)
	return -1


	def normalize_latex(l, rm_trail=False):
	if "tabular" in l:
	latex_type = "tabular"
	else:
	latex_type = "formula"

	if rm_trail:
	l = remove_trailing_latex(l)
	l = l.strip().replace(r"\pmatrix", r"\mypmatrix").replace(r"\matrix", r"\mymatrix")

	for item in ["\\raggedright", "\\arraybackslash"]:
	l = l.replace(item, "")

	for item in ["\\lowercase", "\\uppercase"]:
	l = l.replace(item, "")

	pattern = r"\\[hv]space { [.0-9a-z ]+ }"
	old_token = re.findall(pattern, l, re.DOTALL)
	if latex_type == "tabular":
	new_token = ["" for item in old_token]
	else:
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft)

	if latex_type == "tabular":
	l = l.replace("\\begin {tabular}", "\\begin{tabular}")
	l = l.replace("\\end {tabular}", "\\end{tabular}")
	l = l.replace("\\begin {array}", "\\begin{array}")
	l = l.replace("\\end {array}", "\\end{array}")
	l_split = l.split(" ")
	idx = 0
	while idx < len(l_split):
	token = l_split[idx]
	if token == "\\begin{tabular}":
	sub_idx = idx + 1
	end_idx = find_matching_brace(l_split, sub_idx)
	new_token = "".join(l_split[idx : end_idx + 1])
	l_split = l_split[0:idx] + [new_token] + l_split[end_idx + 1 :]
	break
	idx += 1
	l = " ".join(l_split)
	l_split = l.split(" ")
	idx = 0
	while idx < len(l_split):
	token = l_split[idx]
	if token in ["\\cmidrule", "\\cline"]:
	sub_idx = idx + 1
	if l_split[sub_idx] == "(":
	mid_end = find_matching_brace(l_split, sub_idx, brace=["(", ")"])
	end_idx = find_matching_brace(l_split, mid_end + 1)
	else:
	end_idx = find_matching_brace(l_split, sub_idx)
	new_token = "".join(l_split[idx : end_idx + 1])
	l_split = l_split[0:idx] + [new_token] + l_split[end_idx + 1 :]
	idx += 1
	l = " ".join(l_split)

	pattern = r"\\begin{array} { [lrc ]+ }"
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [
	item.replace("\\begin{array} ", "<s>")
	.replace(" ", "")
	.replace("<s>", "\\begin{array} ")
	for item in old_token
	]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft)

	l = " " + l + " "
	l = re.sub(r"(?<=\s)--(?=\s)", r"- -", l)
	l = re.sub(r"(?<=\s)---(?=\s)", r"- - -", l)
	l = re.sub(r"(?<=\s)…(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\ldots(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\hdots(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\cdots(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\dddot(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\dots(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\dotsc(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\dotsi(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\dotsm(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\dotso(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\dotsb(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\mathellipsis(?=\s)", r". . .", l)
	l = re.sub(r"(?<=\s)\\ex(?=\s)", r"\\mathrm { e x }", l)
	l = re.sub(r"(?<=\s)\\ln(?=\s)", r"\\mathrm { l n }", l)
	l = re.sub(r"(?<=\s)\\lg(?=\s)", r"\\mathrm { l g }", l)
	l = re.sub(r"(?<=\s)\\cot(?=\s)", r"\\mathrm { c o t }", l)
	l = re.sub(r"(?<=\s)\\mod(?=\s)", r"\\mathrm { m o d }", l)
	l = re.sub(r"(?<=\s)\\bmod(?=\s)", r"\\mathrm { m o d }", l)
	l = re.sub(r"(?<=\s)\\pmod(?=\s)", r"\\mathrm { m o d }", l)
	l = re.sub(r"(?<=\s)\\min(?=\s)", r"\\mathrm { m i n }", l)
	l = re.sub(r"(?<=\s)\\max(?=\s)", r"\\mathrm { m a x }", l)
	l = re.sub(r"(?<=\s)\\ker(?=\s)", r"\\mathrm { k e r }", l)
	l = re.sub(r"(?<=\s)\\hom(?=\s)", r"\\mathrm { h o m }", l)
	l = re.sub(r"(?<=\s)\\sec(?=\s)", r"\\mathrm { s e c }", l)
	l = re.sub(r"(?<=\s)\\scs(?=\s)", r"\\mathrm { s c s }", l)
	l = re.sub(r"(?<=\s)\\csc(?=\s)", r"\\mathrm { c s c }", l)
	l = re.sub(r"(?<=\s)\\deg(?=\s)", r"\\mathrm { d e g }", l)
	l = re.sub(r"(?<=\s)\\arg(?=\s)", r"\\mathrm { a r g }", l)
	l = re.sub(r"(?<=\s)\\log(?=\s)", r"\\mathrm { l o g }", l)
	l = re.sub(r"(?<=\s)\\dim(?=\s)", r"\\mathrm { d i m }", l)
	l = re.sub(r"(?<=\s)\\exp(?=\s)", r"\\mathrm { e x p }", l)
	l = re.sub(r"(?<=\s)\\sin(?=\s)", r"\\mathrm { s i n }", l)
	l = re.sub(r"(?<=\s)\\cos(?=\s)", r"\\mathrm { c o s }", l)
	l = re.sub(r"(?<=\s)\\tan(?=\s)", r"\\mathrm { t a n }", l)
	l = re.sub(r"(?<=\s)\\tanh(?=\s)", r"\\mathrm { t a n h }", l)
	l = re.sub(r"(?<=\s)\\cosh(?=\s)", r"\\mathrm { c o s h }", l)
	l = re.sub(r"(?<=\s)\\sinh(?=\s)", r"\\mathrm { s i n h }", l)
	l = re.sub(r"(?<=\s)\\coth(?=\s)", r"\\mathrm { c o t h }", l)
	l = re.sub(r"(?<=\s)\\arcsin(?=\s)", r"\\mathrm { a r c s i n }", l)
	l = re.sub(r"(?<=\s)\\arccos(?=\s)", r"\\mathrm { a r c c o s }", l)
	l = re.sub(r"(?<=\s)\\arctan(?=\s)", r"\\mathrm { a r c t a n }", l)
	l = re.sub(r"(?<=\s)\\bf([a-zA-Z])", r"\\mathbf{\1}", l)

	pattern = r"\\string [^ ]+ "
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft + " ")

	pattern = r"\\[Bb]ig[g]?[glrm]? [(){}\|\[\]] "
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft + " ")

	pattern = r"\\[Bb]ig[g]?[glrm]? \\.*? "
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft + " ")

	pattern = r"\\operatorname \*"
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = ["\\operatorname" for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft)

	l = l.replace("\\lefteqn", "")

	l = l.replace("\\footnote ", "^ ")

	pattern = r"\\\' [^{] "
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft + " ")

	if latex_type == "tabular":
	pattern = r"\[ [\-.0-9 ]+[exptcm ]+ \]"
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft)

	pattern = r"\\parbox {[^{]+}"
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft)

	pattern = r"\\raisebox {[^{]+} [\[\]0-9 exptcm]+{"
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft[0:-1] + " {")

	pattern = r"{ \\char[0-9\' ]+}"
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, "{ " + aft[1:-1] + " }")

	pattern = r"\\rule {[ .0-9a-z]+} {[ .0-9a-z]+}"
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft)

	pattern = r"\\specialrule {[ .0-9a-z]+} {[ .0-9a-z]+} {[ .0-9a-z]+}"
	old_token = re.findall(pattern, l, re.DOTALL)
	new_token = [item.replace(" ", "") for item in old_token]
	for bef, aft in zip(old_token, new_token):
	l = l.replace(bef, aft)

	pattern = r"\\colorbox[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } \|\\color[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } \|\\textcolor[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } \|\\cellcolor[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } "
	old_token = re.findall(pattern, l, re.DOTALL)
	for bef in old_token:
	l = l.replace(bef, "")

	l_split = l.split(" ")
	idx = 0
	while idx < len(l_split):
	token = l_split[idx]
	if token in ONE_Tail_Tokens + ONE_Tail_Invisb_Tokens:
	sub_idx = idx + 1
	while (
	sub_idx < len(l_split)
	and l_split[sub_idx] in ONE_Tail_Tokens + ONE_Tail_Invisb_Tokens
	):
	sub_idx += 1
	new_split = l_split[0:idx]
	for ii in range(idx, sub_idx):
	new_split = new_split + [l_split[ii], "{"]
	if l_split[sub_idx] != "{":
	new_split = new_split + [l_split[sub_idx]] + ["}"] * (sub_idx - idx)
	l_split = new_split + l_split[sub_idx + 1 :]
	else:
	end_idx = find_matching_brace(l_split, sub_idx)
	new_split = (
	new_split + l_split[sub_idx + 1 : end_idx] + ["}"] * (sub_idx - idx)
	)
	l_split = new_split + l_split[end_idx + 1 :]
	elif token in AB_Tail_Tokens:
	if l_split[idx + 1] != "[" and l_split[idx + 1] != "{":
	l_split = (
	l_split[0 : idx + 1]
	+ ["{"]
	+ [l_split[idx + 1]]
	+ ["}"]
	+ l_split[idx + 2 :]
	)
	else:
	if l_split[idx + 1] == "[":
	end1 = find_matching_brace(l_split, idx + 1, brace=["[", "]"])
	else:
	end1 = idx
	if l_split[end1 + 1] != "{":
	l_split = (
	l_split[0 : end1 + 1]
	+ ["{"]
	+ [l_split[end1 + 1]]
	+ ["}"]
	+ l_split[end1 + 2 :]
	)
	elif token in TWO_Tail_Tokens + TWO_Tail_Invisb_Tokens:
	if l_split[idx + 1] != "{":
	l_split = (
	l_split[0 : idx + 1]
	+ ["{"]
	+ [l_split[idx + 1]]
	+ ["}"]
	+ l_split[idx + 2 :]
	)
	end1 = find_matching_brace(l_split, idx + 1)
	if l_split[end1 + 1] != "{":
	l_split = (
	l_split[0 : end1 + 1]
	+ ["{"]
	+ [l_split[end1 + 1]]
	+ ["}"]
	+ l_split[end1 + 2 :]
	)

	idx += 1
	l = " ".join(l_split)

	return l


	def token_add_color(l_split, idx, render_dict):
	token = l_split[idx]
	if token in PHANTOM_Tokens:
	if l_split[idx + 1] == "{":
	brace_end = find_matching_brace(l_split, idx + 1)
	else:
	brace_end = idx + 1
	next_idx = brace_end + 1
	elif token in TWO_Tail_Tokens:
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start)
	den_start = num_end + 1
	den_end = find_matching_brace(l_split, den_start)
	l_split_copy = (
	l_split[:idx]
	+ [r"\mathcolor{black}{" + token + "{"]
	+ [r"\mathcolor{gray}{"]
	+ l_split[num_start + 1 : num_end]
	+ ["}"]
	+ [r"}{"]
	+ [r"\mathcolor{gray}{"]
	+ l_split[den_start + 1 : den_end]
	+ ["}"]
	+ ["}"]
	+ ["}"]
	+ l_split[den_end + 1 :]
	)

	l_new = " ".join(l_split_copy)
	l_new = r"\mathcolor{gray}{ " + l_new + " }"
	render_dict[str(idx)] = l_new, token
	next_idx = idx + 1
	elif token in ONE_Tail_Tokens:
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start)
	l_split_copy = (
	l_split[:idx]
	+ [r"\mathcolor{black}{"]
	+ l_split[idx : num_start + 1]
	+ [r"\mathcolor{gray}{"]
	+ l_split[num_start + 1 : num_end]
	+ ["}"]
	+ l_split[num_end : num_end + 1]
	+ ["}"]
	+ l_split[num_end + 1 :]
	)
	l_new = " ".join(l_split_copy)
	l_new = r"\mathcolor{gray}{ " + l_new + " }"
	render_dict[str(idx)] = l_new, token
	next_idx = idx + 1
	elif token in ONE_Tail_Invisb_Tokens:
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start)
	sub_idx = num_start + 1
	if num_end - num_start == 2:
	l_split_copy = l_split.copy()
	l_split_copy[sub_idx] = (
	r"{\mathcolor{black}{" + l_split_copy[sub_idx] + "}}"
	)
	l_new = " ".join(l_split_copy)
	l_new = r"\mathcolor{gray}{ " + l_new + " }"
	render_dict[str(idx)] = l_new, l_split[sub_idx]
	next_idx = num_end
	else:
	while sub_idx < num_end:
	l_split, sub_idx, render_dict = token_add_color(
	l_split, sub_idx, render_dict
	)
	next_idx = num_end + 1
	elif token in AB_Tail_Tokens:
	if l_split[idx + 1] == "{":
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start)
	l_split_copy = (
	l_split[:idx]
	+ [r"\mathcolor{black}{"]
	+ l_split[idx : idx + 2]
	+ [r"\mathcolor{gray}{"]
	+ l_split[num_start + 1 : num_end]
	+ ["}}"]
	+ l_split[num_end:]
	)
	l_new = " ".join(l_split_copy)
	l_new = r"\mathcolor{gray}{ " + l_new + " }"
	render_dict[str(idx)] = l_new, token
	sub_idx = num_start + 1
	while sub_idx < num_end:
	l_split, sub_idx, render_dict = token_add_color(
	l_split, sub_idx, render_dict
	)
	next_idx = num_end + 1
	elif l_split[idx + 1] == "[":
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start, brace=["[", "]"])
	den_start = num_end + 1
	den_end = find_matching_brace(l_split, den_start)
	l_split_copy = (
	l_split[:idx]
	+ [r"{\mathcolor{black}{"]
	+ l_split[idx : idx + 2]
	+ [r"\mathcolor{gray}{"]
	+ l_split[idx + 2 : num_end]
	+ ["}"]
	+ l_split[num_end : den_start + 1]
	+ [r"\mathcolor{gray}{"]
	+ l_split[den_start + 1 : den_end]
	+ ["}"]
	+ l_split[den_end : den_end + 1]
	+ ["}}"]
	+ l_split[den_end + 1 :]
	)
	l_new = " ".join(l_split_copy)
	l_new = r"\mathcolor{gray}{ " + l_new + " }"
	render_dict[str(idx)] = l_new, token
	sub_idx = num_start + 1
	while sub_idx < num_end:
	l_split, sub_idx, render_dict = token_add_color(
	l_split, sub_idx, render_dict
	)
	sub_idx = den_start + 1
	while sub_idx < den_end:
	l_split, sub_idx, render_dict = token_add_color(
	l_split, sub_idx, render_dict
	)
	next_idx = den_end + 1
	elif token in ["\\multicolumn", "\\multirow"]:
	first_start = idx + 1
	first_end = find_matching_brace(l_split, first_start)
	second_start = first_end + 1
	second_end = find_matching_brace(l_split, second_start)
	third_start = second_end + 1
	third_end = find_matching_brace(l_split, third_start)

	sub_idx = third_start + 1
	while sub_idx < third_end:
	l_split, sub_idx, render_dict = token_add_color(
	l_split, sub_idx, render_dict
	)
	next_idx = third_end + 1
	elif token in SKIP_Tokens + TWO_Tail_Invisb_Tokens or any(
	re.match(pattern, token) for pattern in SKIP_PATTERNS
	):
	if (token == "[" and l_split[idx - 1] != "\\sqrt") or (
	token == "]" and idx >= 3 and l_split[idx - 3] != "\\sqrt"
	):
	l_split_copy = l_split.copy()
	l_split_copy[idx] = r"\mathcolor{black}{ " + l_split_copy[idx] + " }"
	l_new = " ".join(l_split_copy)
	l_new = r"\mathcolor{gray}{ " + l_new + " }"
	render_dict[str(idx)] = l_new, token
	next_idx = idx + 1
	else:
	next_idx = idx + 1
	else:
	l_split_copy = l_split.copy()
	l_split_copy[idx] = r"\mathcolor{black}{ " + l_split_copy[idx] + " }"

	l_new = " ".join(l_split_copy)
	l_new = r"\mathcolor{gray}{ " + l_new + " }"
	render_dict[str(idx)] = l_new, token
	next_idx = idx + 1

	return l_split, next_idx, render_dict


	def token_add_color_RGB(l_split, idx, token_list, brace_color=False):
	"""using \mathcolor[RGB]{r,g,b} to render latex."""
	token = l_split[idx]
	if not token:
	next_idx = idx + 1
	elif token in PHANTOM_Tokens:
	if l_split[idx + 1] == "{":
	brace_end = find_matching_brace(l_split, idx + 1)
	else:
	brace_end = idx + 1
	next_idx = brace_end + 1
	elif token in TWO_Tail_Tokens:
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start)
	den_start = num_end + 1
	den_end = find_matching_brace(l_split, den_start)
	color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace(
	"<idx>", str(len(token_list))
	)
	l_split = (
	l_split[:idx]
	+ [color_token + token]
	+ l_split[idx + 1 : den_end + 1]
	+ ["}"]
	+ l_split[den_end + 1 :]
	)
	token_list.append(token)
	next_idx = idx + 1
	elif token in ONE_Tail_Tokens:
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start)
	color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace(
	"<idx>", str(len(token_list))
	)
	if (
	token != "\\underbrace"
	and num_end + 1 < len(l_split)
	and l_split[num_end + 1] == "_"
	):
	l_split = (
	l_split[:idx]
	+ ["{" + color_token + token]
	+ l_split[idx + 1 : num_end + 1]
	+ ["}}"]
	+ l_split[num_end + 1 :]
	)
	else:
	l_split = (
	l_split[:idx]
	+ [color_token + token]
	+ l_split[idx + 1 : num_end + 1]
	+ ["}"]
	+ l_split[num_end + 1 :]
	)
	token_list.append(token)
	next_idx = idx + 1
	elif token in ONE_Tail_Invisb_Tokens:
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start)
	sub_idx = num_start + 1
	if num_end - num_start == 2:
	color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace(
	"<idx>", str(len(token_list))
	)
	token_list.append(l_split[num_start + 1])
	l_split = (
	l_split[: num_start + 1]
	+ [color_token + l_split[num_start + 1] + "}"]
	+ l_split[num_end:]
	)
	else:
	while sub_idx < num_end:
	l_split, sub_idx, token_list = token_add_color_RGB(
	l_split, sub_idx, token_list
	)
	next_idx = num_end + 1
	elif token in AB_Tail_Tokens:
	if l_split[idx + 1] == "{":
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start)
	color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace(
	"<idx>", str(len(token_list))
	)
	l_split = (
	l_split[:idx]
	+ [color_token + token]
	+ l_split[idx + 1 : num_end + 1]
	+ ["}"]
	+ l_split[num_end + 1 :]
	)
	token_list.append(token)
	sub_idx = num_start + 1
	while sub_idx < num_end:
	l_split, sub_idx, token_list = token_add_color_RGB(
	l_split, sub_idx, token_list
	)
	next_idx = num_end + 1
	elif l_split[idx + 1] == "[":
	num_start = idx + 1
	num_end = find_matching_brace(l_split, num_start, brace=["[", "]"])
	den_start = num_end + 1
	den_end = find_matching_brace(l_split, den_start)
	color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace(
	"<idx>", str(len(token_list))
	)
	l_split = (
	l_split[:idx]
	+ [color_token + token]
	+ l_split[idx + 1 : den_end + 1]
	+ ["}"]
	+ l_split[den_end + 1 :]
	)
	token_list.append(token)
	sub_idx = num_start + 1
	while sub_idx < num_end:
	l_split, sub_idx, token_list = token_add_color_RGB(
	l_split, sub_idx, token_list, brace_color=True
	)
	sub_idx = den_start + 1
	while sub_idx < den_end:
	l_split, sub_idx, token_list = token_add_color_RGB(
	l_split, sub_idx, token_list
	)
	next_idx = den_end + 1
	elif token in ["\\multicolumn", "\\multirow"]:
	first_start = idx + 1
	first_end = find_matching_brace(l_split, first_start)
	second_start = first_end + 1
	second_end = find_matching_brace(l_split, second_start)
	third_start = second_end + 1
	third_end = find_matching_brace(l_split, third_start)

	sub_idx = third_start + 1
	while sub_idx < third_end:
	l_split, sub_idx, token_list = token_add_color_RGB(
	l_split, sub_idx, token_list
	)
	next_idx = third_end + 1
	elif token in SKIP_Tokens + TWO_Tail_Invisb_Tokens or any(
	re.match(pattern, token) for pattern in SKIP_PATTERNS
	):

	if (token == "[" and l_split[idx - 1] != "\\sqrt") or (
	token == "]" and idx >= 3 and l_split[idx - 3] != "\\sqrt"
	):
	color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace(
	"<idx>", str(len(token_list))
	)
	l_split = (
	l_split[:idx] + [color_token + l_split[idx] + "}"] + l_split[idx + 1 :]
	)
	token_list.append(token)
	next_idx = idx + 1
	else:
	next_idx = idx + 1
	else:

	if brace_color or (idx > 1 and l_split[idx - 1] == "_"):
	color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace(
	"<idx>", str(len(token_list))
	)
	l_split = (
	l_split[:idx]
	+ ["{" + color_token + l_split[idx] + "}}"]
	+ l_split[idx + 1 :]
	)
	token_list.append(token)
	next_idx = idx + 1
	else:
	color_token = "\\mathcolor[RGB]{<color_<idx>>}{".replace(
	"<idx>", str(len(token_list))
	)
	l_split = (
	l_split[:idx] + [color_token + l_split[idx] + "}"] + l_split[idx + 1 :]
	)
	token_list.append(token)
	next_idx = idx + 1
	return l_split, next_idx, token_list