Spaces:

Andreas99
/

LitBench-UI

Build error

App Files Files Community

Andreas99 commited on Mar 11, 2025

Commit

8dd388d

verified ·

1 Parent(s): a3bcde9

Delete utils

Browse files

Files changed (6) hide show

utils/de-macro.py +0 -1110
utils/def_handle.py +0 -75
utils/gradio_utils.py +0 -20
utils/graph_utils.py +0 -111
utils/latexpand +0 -713
utils/utils.py +0 -701

utils/de-macro.py DELETED Viewed

@@ -1,1110 +0,0 @@
-#!/usr/bin/python -O
-r"""
-Copyright 2005-2020 Peter Gacs
-Licensed under the Academic Free Licence version 2.1
-						  DE-MACRO
-Version 1.4.1 - A small typo corrected.
-Version 1.4	 - Luca Citi made it python2.7 and python3 compatible.
-			   Peter Gacs improved the parsing of \input{<filename>},
-			   and made @ a letter in the style files.
-Version 1.3	 - this version is much more conservative about deleting
-			   comments and inserting or deleting blank space: tries to
-			   leave in all comments, adds space only when necessary, and
-			   tries not to delete space in the main text.
-			   The motivating comments came from Daniel Webb.
-Version 1.2	 - a syntactical bug corrected, thanks Brian de Alwis!
-PURPOSE
-This program can eliminate most private macros from a LaTeX file.
-Applications:
-  - your publisher has difficulty dealing with many private macros
-  - you cooperate with colleagues who do not understand your macros
-  - preprocessing before a system like latex2html, which is somewhat
-	unpredictable with private macros.
-It cannot be used to eliminate more complex macros that rely on
-more programming-like constructs in style files.  In particular, it will
-not replace style files that have options.
-USAGE
-de-macro [--defs <defs-db>] <tex-file-1>[.tex] [<tex-file-2>[.tex] ...]
-Simplest example:	 de-macro testament
-(As you see, the <> is used only in the notation of this documentation,
-you should not type it.)
-If <tex-file-i> contains a command \usepackage{<defs-file>-private}
-then the file <defs-file>-private.sty will be read, and its macros will be
-replaced  in <tex-file-i> with their definitions.
-The result is in <tex-file-i>-clean.tex.
-Only newcommand, renewcommand, newenvironment, and renewenvironment are
-understood (it does not matter, whether you write new or renew).
-These can be nested but do not be too clever, since I do not
-guarantee the same expansion order as in TeX.
-FILES
-<tex-file-1>.db
-<tex-file>-clean.tex
-<defs-file>-private.sty
-For speed, a macro database file called <defs-file>.db is created.
-If such a file exists already then it is used.
-If <defs-file>-private.sty is older than <tex-file-1>.db then it will not
-be used.
-It is possible to specify another database filename via --defs <defs-db>.
-Then <defs-db>.db will be used.
-For each <tex-file-i>, a file <tex-file-i>-clean.tex will be produced.
-If <tex-file-i>-clean.tex is newer than <tex-file-i>.tex then it stays.
-INPUT COMMAND
-If a tex file contains a command \input{<tex-file-j>} or \input <tex-file-j>
-then <tex-file-j>.tex is processed recursively, and <tex-file-j>-clean.tex
-will be inserted into the final output.
-For speed, if <tex-file-j>-clean.tex is newer than <tex-file-j>.tex
-then <tex-file-j>.tex will not be reprocessed.
-The dependency checking is not sophisticated, so if you rewrite some macros
-then remove all *-clean.tex files!
-"""
-import sys, os, re, shelve
-# Utilities
-class No_detail:
-	strerror = ""
-no_detail = No_detail()
-class Error(Exception):
-	"""Base class for exceptions in this module."""
-	pass
-class Empty_text_error(Error):
-	"""Exception raised for errors in the input.
-	Attributes:
-		data -- data that was found empty
-		message
-	"""
-	def __init__(self, data, message):
-		self.data = data
-		self.message = message
-def warn(error_message, detail = no_detail):
-	sys.stderr.write(error_message + "\n")
-	if no_detail != detail:
-		sys.stderr.write(detail.strerror + "\n")
-def die(error_message, detail = no_detail):
-	warn(error_message, detail = no_detail)
-	sys.exit(1)
-def getopt_map(one_letter_opts, long_optlist):
-	"Turns long options into an option map, using getopt."
-	import getopt
-	optlist, args = getopt.getopt(sys.argv[1:],
-								  one_letter_opts, long_optlist)
-	opt_map = {}
-	for pair in optlist: opt_map[pair[0]] = pair[1] or 1
-	return opt_map, args
-def newer(file1, file2):
-	if not os.path.isfile(file1):
-		return False
-	try:
-		stat_return = os.lstat(file1)
-	except OSError as detail:
-		die("lstat " + file1 + " failed:", detail)
-	time1 = stat_return.st_mtime
-	try:
-		stat_return = os.lstat(file2)
-	except OSError as detail:
-		die("lstat " + file2 + " failed:", detail)
-	time2 = stat_return.st_mtime
-	return time1 > time2
-def cut_extension(filename, ext):
-	"""
-	If filename has extension ext (including the possible dot),
-	it will be cut off.
-	"""
-	file = filename
-	index = filename.rfind(ext)
-	if 0 <= index and len(file)-len(ext) == index:
-		file = file[:index]
-	return file
-class Stream:
-	data = None
-	pos = None
-	item = None
-	def legal(self):
-		return 0 <= self.pos and self.pos < len(self.data)
-	def uplegal(self):
-		return self.pos < len(self.data)
-	def __init__(self, data_v = None):
-		self.data = data_v
-		if self.data:
-		   self.pos = 0
-		   self.item = self.data[self.pos]
-	def next(self):
-		self.pos += 1
-		if self.pos < len(self.data):
-			self.item = self.data[self.pos]
-			return self.item
-	def reset(self):
-		if self.data and 0 < len(self.data):
-			self.pos = 0
-			self.item = self.data[0]
-			return self.item
-# Basic classes
-blank_re = re.compile(r"\s")
-blanked_filename_re = re.compile(r"^\s+(\w*)\s+")
-braced_filename_re = re.compile(r"^\s*{\s*(\w*)\s*}")
-blank_or_rbrace_re = re.compile(r"[\s}]")
-pos_digit_re = re.compile(r"[1-9]")
-def isletter(c, isatletter=False):
-	if "@" == c:
-		return isatletter
-	else:
-		return c.isalpha()
-class Token:
-	"""Type 0 means ordinary character, types 1,2 mean escape sequence
-	(without the \ ), type 3 means comment.
-	"""
-	simple_ty = 0
-	esc_symb_ty = 1
-	esc_str_ty = 2
-	comment_ty = 3
-	type = simple_ty
-	val = " "
-	def __init__(self, type_v=simple_ty, val_v=" "):
-		self.type = type_v
-		self.val = val_v
-	def show(self):
-		out = ""
-		if simple_ty == self.type or comment_ty == self.type:
-			out = self.val
-		else:
-			out = "\\" + self.val
-		return out
-# Constants
-g_token = Token(0," ")	# generic token
-simple_ty = g_token.simple_ty
-comment_ty = g_token.comment_ty
-esc_symb_ty = g_token.esc_symb_ty
-esc_str_ty = g_token.esc_str_ty
-def detokenize(text, isatletter=False):
-	"""
-	Input is a list of tokens.
-	Output is a string.
-	"""
-	out = ""
-	if 0 == len(text):
-		return
-	pos = 0
-	out += text[pos].show()
-	pos += 1
-	while pos < len(text):
-		previtem = text[pos-1]
-		item = text[pos]
-		"""Insert a separating space after an escape sequence if it is a
-		string and is followed by a letter."""
-		if (esc_str_ty == previtem.type
-			and simple_ty == item.type and isletter(item.val[0], isatletter)):
-			out += " "
-		out += item.show()
-		pos += 1
-	return out
-def strip_comments(text):
-	"""
-	Input is a list of tokens.
-	Output is the same list except the comment tokens.
-	"""
-	out = []
-	for token in text:
-		if not comment_ty == token.type:
-			out.append(token)
-	return out
-class Group:
-	"""type 0 means a token, type 1 means contents of a group within {}
-	"""
-	token_ty = 0
-	group_ty = 1
-	type = token_ty
-	val = [] # Value is a token list.
-	def __init__(self, type_v, val_v):
-		self.type = type_v
-		self.val = val_v
-	def show(self):
-		if token_ty == self.type:
-			return self.val.show()
-		else:
-			return "{%s}" % detokenize(self.val)
-# Constants
-g_group = Group(0, [])
-token_ty = g_group.token_ty
-group_ty = g_group.group_ty
-def tokenize(in_str, isatletter=False):
-	"""Returns a list of tokens.
-	"""
-	text = []
-	cs = Char_stream(in_str)
-	cs.reset()
-	if not cs.legal():
-		raise Error("No string to tokenize.")
-	while cs.uplegal():
-		if "%" == cs.item:
-			comment = cs.scan_comment_token()
-			text.append(Token(comment_ty, comment))
-		elif "\\" != cs.item:
-			text.append(Token(simple_ty, cs.item))
-			cs.next()
-		else:
-			cs.next()
-			name = cs.scan_escape_token(isatletter)
-			if isletter(name[0], isatletter):
-				token = Token(esc_str_ty, name)
-			else:
-				token = Token(esc_symb_ty, name)
-			text.append(token)
-			if "makeatletter" == name:
-				isatletter=True
-			elif "makeatother" == name:
-				isatletter=False
-	return text
-class Command_def:
-	name = "1"
-	numargs = 0
-	body= ""
-	def __init__(self, name_v, numargs_v, body_v):
-		self.name = name_v
-		self.numargs = numargs_v
-		self.body = body_v
-	def show(self):
-		out = "\\newcommand{\\%s}" % (self.name)
-		if 0 < self.numargs:
-			out += "[%d]" % self.numargs
-		out += "{%s}" % detokenize(self.body)
-		return out
-class Env_def:
-	name = "1"
-	numargs = 0
-	begin = ""
-	end = ""
-	def __init__(self, name_v, numargs_v, begin_v, end_v):
-		self.name = name_v
-		self.numargs = numargs_v
-		self.begin = begin_v
-		self.end = end_v
-	def show(self):
-		out = "\\newenvironment{%s}" % self.name
-		if 0 < self.numargs:
-			out += "[%d]" % self.numargs
-		out += "{%s}" % detokenize(self.begin)
-		out += "{%s}" % detokenize(self.end)
-		return out
-class Command_instance:
-	name = "1"
-	args = []
-	def __init__(self, name_v, args_v):
-		self.name = name_v
-		self.args = args_v
-	def show(self):
-		out = "\\"+self.name
-		for arg in self.args:
-			out += "{%s}" % detokenize(arg)
-		return out
-class Env_instance:
-	name = "1"
-	args = []
-	def __init__(self, name_v, args_v, body_v):
-		self.name = name_v
-		self.args = args_v
-		self.body = body_v
-	def show(self):
-		out = "\\begin{%s}" % self.name
-		for arg in self.args:
-			out += "{%s}" % detokenize(arg)
-		out += detokenize(self.body)
-		out += "\\end{%s}" % self.name
-		return out
-class Char_stream(Stream):
-	def scan_escape_token(self, isatletter=False):
-		"""
-		Starts after the escape sign, assumes that it is scanning a symbol.
-		Returns a token-string.
-		"""
-		out = self.item # Continue only if this is a letter.
-		item = self.next()
-		if isletter(out, isatletter):
-			while self.uplegal() and isletter(item, isatletter):
-				out += item
-				item = self.next()
-		return out
-	def scan_comment_token(self):
-		"""
-		Starts at the comment sign %, assumes that it is scanning a comment.
-		Returns the whole comment string,
-		including the % and all empty space after it.
-		"""
-		comment = ""
-		while self.uplegal() and "\n" != self.item:
-			comment += self.item
-			self.next()
-		while self.uplegal() and blank_re.match(self.item):
-			comment += self.item
-			self.next()
-		return comment
-	def scan_input_filename(self):
-		"""We have just read an \input token.  The next group or word will be
-		interpreted as a filename (possibly without .tex).	Filenames should not begin with spaces.
-		Return the filename.
-		"""
-		item = self.item
-		file = ""
-		while self.uplegal() and blank_re.match(self.item):
-			item = self.next()
-		if "{" == item:
-			item = self.next()
-			while self.uplegal() and not "}" == item:
-				file += item
-				item = self.next()
-			self.next()
-		else:
-			while self.uplegal() and not blank_re.match(item):
-				file += item
-				item = self.next()
-		return file
-	def scan_package_filenames(self):
-		r"""We just read a \usepackage token.  The next group will be
-		interpreted as a list of filenames (without .sty) separated by commas.
-		Return the list.
-		"""
-		item = self.item
-		while self.uplegal() and blank_re.match(item):
-			item = self.next()
-		file = ""
-		if not "{" == item:
-			raise Error("\\usepackage not followed by brace.")
-		item = self.next()
-		while self.uplegal() and not blank_or_rbrace_re.match(item):
-			file += item
-			item = self.next()
-		self.next()
-		return file.split(",")
-class Tex_stream(Stream):
-	defs = ({}, {})
-	defs_db = "x"
-	defs_db_file = "x.db"
-	debug = False
-	def smart_tokenize(self, in_str, handle_inputs=False, isatletter=False):
-		"""Returns a list of tokens.
-		It may interpret and carry out all \input commands.
-		"""
-		self.data = []
-		text = self.data
-		cs = Char_stream(in_str)
-		cs.reset()
-		if not cs.legal():
-			raise Error("No string to tokenize.")
-		while cs.uplegal():
-			if "%" == cs.item:
-				comment = cs.scan_comment_token()
-				text.append(Token(comment_ty, comment))
-			elif "\\" != cs.item:
-				text.append(Token(simple_ty, cs.item))
-				cs.next()
-			else:
-				cs.next()
-				name = cs.scan_escape_token(isatletter)
-				if "input" == name and handle_inputs:
-					file = cs.scan_input_filename()
-					to_add = self.process_if_newer(file)
-					text.extend(to_add)
-				elif "usepackage" == name:
-					while cs.uplegal() and blank_re.match(cs.item):
-						cs.next()
-					if "[" == cs.item: # Packages with options will not be processed.
-						text.extend([Token(esc_str_ty, "usepackage"),
-									 Token(simple_ty, "[")])
-						cs.next()
-						continue
-					files = cs.scan_package_filenames()
-					i = 0
-					while i < len(files):  # process private packages
-						file = files[i]
-						p = file.rfind("-private")
-						if p < 0 or not len(file) - len("-private") == p:
-							i += 1
-							continue
-						defs_db_file = file+".db"
-						self.add_defs(file)
-						del files[i:(i+1)]
-					if files: # non-private packages left
-						group_content = ",".join(files)
-						to_add_str = "\\usepackage{%s}" % (group_content)
-						to_add = tokenize(to_add_str,isatletter)
-						text.extend(to_add)
-				else:
-					if isletter(name[0], isatletter):
-						token = Token(esc_str_ty, name)
-					else:
-						token = Token(esc_symb_ty, name)
-					text.append(token)
-					if "makeatletter" == name:
-						isatletter=True
-					elif "makeatother" == name:
-						isatletter=False
-		self.reset()
-		return self.data
-	def smart_detokenize(self,isatletter=False):
-		"""
-		Output is a string.
-		If the list contains an \input{file} then the content of file
-		file-clean.tex replaces it in the output.
-		"""
-		self.reset()
-		if not self.legal():
-			return ""
-		out = ""
-		previtem = None
-		while self.uplegal():
-			item = self.item
-			"""Insert a separating space after an escape sequence if it is a
-			string and is followed by a letter."""
-			if (None != previtem and esc_str_ty == previtem.type
-				and simple_ty == item.type and isletter(item.val[0], isatletter)):
-				out += " "
-			previtem = item
-			if not (esc_str_ty == item.type and "input" == item.val):
-				out += item.show()
-				self.next()
-			else:
-				self.next()
-				group = self.scan_group()
-				file = detokenize(group.val)
-				clean_file = "%s-clean.tex" % (file)
-				print("Reading file %s" % (clean_file))
-				fp = open(clean_file,"r")
-				content = fp.read()
-				fp.close()
-				out += content
-		return out
-	# Basic tex scanning
-	def skip_blank_tokens(self): # we also skip comment tokens.
-		item = self.item
-		while (self.uplegal() and
-			   (comment_ty == item.type or
-				(simple_ty == item.type and blank_re.match(item.val)))):
-			item = self.next()
-		return item
-	def scan_group(self):
-		"""Returns group.
-		"""
-		if not self.legal():
-			raise Error("No group to scan.")
-		item = self.item
-		if not (simple_ty == item.type and "{" == item.val):
-			return Group(token_ty, [self.item])
-		count = 1
-		group = []
-		item = self.next()
-		while count and self.uplegal():
-			if simple_ty == item.type:
-				if "{" == item.val:
-					count += 1
-				elif "}" == item.val:
-					count -= 1
-			if count != 0:
-				group.append(item)
-			item = self.next()
-		return Group(group_ty, group)
-	# Command and environment definitions
-	def scan_command_name(self):
-		"""Returns name.
-		"""
-		if not self.legal():
-			raise Error("No command name to scan.")
-		item = self.item
-		name = ""
-		if item.type in [esc_symb_ty, esc_str_ty]:
-			name = item.val
-		else:
-			if not "{" == item.val:
-				raise Error("Command definition misses first {.")
-			self.next()
-			item = self.skip_blank_tokens()
-			if not item.type in [esc_symb_ty, esc_str_ty]:
-				raise Error("Command definition does not begin with control sequence.")
-			name = item.val
-			self.next()
-			item = self.skip_blank_tokens()
-			if not "}" == item.val:
-				raise Error("Definition for commmand %s misses first }., %s" %
-					   (name, item.val))
-		self.next()
-		self.skip_blank_tokens()
-		return name
-	def scan_numargs(self, name):
-		"""
-		name is the name of the command or environment definition being
-		scanned.
-		Starts on a nonblank token.
-		Returns numargs
-		where numargs is the number of arguments in a command or environment
-		definition,
-		"""
-		if not self.legal():
-			raise Error("No numargs to scan.")
-		item = self.item
-		numargs = 0
-		if not simple_ty == item.type:
-			raise Error("Illegal command or environment definition: "+name)
-		if "[" == item.val:
-			if not 4 < len(self.data):
-				raise Error("Command or environment definition is illegal: "+name)
-			item = self.next()
-			if not simple_ty == item.type:
-				raise Error("Illegal command or environment definition: "+name)
-			numargs = item.val
-			if not pos_digit_re.match(numargs):
-				raise Error("%s must be argument number after %s" % (numargs, name))
-			numargs = int(numargs)
-			self.next()
-			item = self.skip_blank_tokens()
-			if not simple_ty == item.type:
-				raise Error("Illegal command definition: "+name)
-			if "]" != item.val:
-				raise Error("Illegal command definition: "+name)
-			self.next()
-			self.skip_blank_tokens()
-		return numargs
-	def scan_command_def(self):
-		"""Scan a command definition.
-		Return command_def.
-		Assumes that the number of arguments is at most 9.
-		"""
-		if not self.legal():
-			raise Error("No command definition to scan.")
-		item = self.item
-		if not 2 < len(self.data):
-			raise Error("Command definition is illegal.")
-		# newcommand or renewcommand
-		if not item.type in [esc_symb_ty, esc_str_ty]:
-			raise Error("Command definition should begin with control sequence: "+item.val)
-		if item.val not in ["newcommand", "renewcommand"]:
-			raise Error("Command definition should begin with control sequence.")
-		self.next()
-		self.skip_blank_tokens()
-		cmd_name = self.scan_command_name()
-		numargs = self.scan_numargs(cmd_name)
-		body_group = self.scan_group()
-		if group_ty != body_group.type:
-			raise Error("Command body missing: "+cmd_name)
-		body_val = strip_comments(body_group.val)
-		return Command_def(cmd_name, numargs, body_val)
-	def scan_env_name(self):
-		"""Starts on a {.
-		Returns name.
-		"""
-		if not self.legal():
-			raise Error("No environment name to scan.")
-		item = self.item
-		if not "{" == item.val:
-			raise Error("Env. definition begins with %s, not with {" % (item.val))
-		self.next()
-		item = self.skip_blank_tokens()
-		name = ""
-		if not simple_ty == item.type:
-			raise Error("1. Env. def. begins with cont. seq. %s, not with env.name."
-				 % (item.val))
-		while self.uplegal() and not blank_or_rbrace_re.match(item.val):
-			name += item.val
-			item = self.next()
-			if not simple_ty == item.type:
-				raise Error("2. Env. def. begins with cont. seq. %s, not with env.name."
-					   % (item.val))
-		item = self.skip_blank_tokens()
-		if not "}" == item.val:
-			raise Error("Command definition does not begin with control sequence.")
-		self.next()
-		self.skip_blank_tokens()
-		return name
-	def scan_env_def(self):
-		"""Scan an environment definition.
-		Return env_def
-		Assumes that the number of arguments is at most 9.
-		"""
-		if not self.legal():
-			raise Error("No environment definition to scan.")
-		item = self.item
-		if not 7 < len(self.data):
-			raise Error("Environment definition is illegal.")
-		pos = 0
-		if not item.type in [esc_symb_ty, esc_str_ty]:
-			raise Error("Env. definition does not begin with control sequence:"+
-				   item.val)
-		if item.val not in ["newenvironment", "renewenvironment"]:
-			raise Error("Env. definition does not begin with control sequence.")
-		self.next()
-		self.skip_blank_tokens()
-		env_name = self.scan_env_name()
-		numargs = self.scan_numargs(env_name)
-		self.skip_blank_tokens()
-		begin_group = self.scan_group()
-		if group_ty != begin_group.type:
-			raise Error("Begin body missing: "+env_name)
-		begin_val = strip_comments(begin_group.val)
-		self.skip_blank_tokens()
-		end_group = self.scan_group()
-		if group_ty != end_group.type:
-			raise Error("End body missing:"+env_name)
-		end_val = strip_comments(end_group.val)
-		return Env_def(env_name, numargs, begin_val, end_val)
-	def scan_defs(self):
-		if not self.legal():
-			raise Error("No definitions to scan.")
-		self.reset()
-		command_defs, env_defs = self.defs
-		while self.uplegal():
-			if (esc_str_ty == self.item.type
-				and self.item.val in ["newcommand", "renewcommand"]):
-				def_start_pos = self.pos
-				command_def = self.scan_command_def()
-				command_defs[command_def.name] = command_def
-				def_end_pos = self.pos
-				for del_pos in range(def_start_pos,def_end_pos):
-					del self.data[def_start_pos]
-				self.pos = def_start_pos
-				self.item = self.data[self.pos]
-			elif (esc_str_ty == self.item.type and self.item.val
-				  in ["newenvironment", "renewenvironment"]):
-				def_start_pos = self.pos
-				env_def = self.scan_env_def()
-				env_defs[env_def.name] = env_def
-				def_end_pos = self.pos
-				for del_pos in range(def_start_pos,def_end_pos):
-					del self.data[def_start_pos]
-				self.pos = def_start_pos
-				self.item = self.data[self.pos]
-			else:
-				self.next()
-	# Instances
-	def scan_args(self, command_or_env_def):
-		"""Scan the arguments of a command or environment.
-		Return [args].
-		"""
-		if not self.legal():
-			raise Error("No arguments to scan.")
-		numargs = command_or_env_def.numargs
-		name = command_or_env_def.name
-		args = []
-		for i in range(numargs):
-			arg = []
-			if not (simple_ty == self.item.type and "{" == self.item.val):
-				arg = [self.item]
-				self.next()
-			else:
-				group = self.scan_group()
-				arg = group.val
-			args.append(arg)
-		return args
-	def scan_command(self, command_def):
-		"""Scan the arguments of a command.
-		Return command_instance
-		"""
-		if not self.legal():
-			raise Error("No command to scan.")
-		if not self.item.type in [esc_symb_ty, esc_str_ty]:
-			raise Error("Command does not begin with control sequence.")
-		name = self.item.val
-		self.next()
-		if 0 < command_def.numargs:
-			self.skip_blank_tokens()
-			args = self.scan_args(command_def)
-		else:
-			args = []
-		return Command_instance(name, args)
-	def test_env_boundary(self, item):
-		"""Check whether an environment begin or end follows.
-		Return 1 if \begin, -1 if \end, 0 otherwise.
-		"""
-		d = 0
-		if esc_str_ty == item.type:
-			if "begin"==item.val:
-				d = 1
-			elif "end"==item.val:
-				d = -1
-		return d
-	def scan_env_begin(self):
-		"""Scan an environment name.
-		Return env_name.
-		"""
-		if not self.legal():
-			raise Error("No environment begin to scan.")
-		item = self.item
-		if not (esc_str_ty == item.type and "begin" == item.val):
-			raise Error("Environment does not begin with begin.")
-		self.next()
-		name_group = self.scan_group()
-		name = detokenize(name_group.val)
-		return name
-	def scan_env_end(self):
-		"""Scan an environment end.
-		Return env_name.
-		"""
-		if not self.legal():
-			raise Error("No environment end to scan.")
-		item = self.item
-		if not (esc_str_ty == item.type and "end" == item.val):
-			raise Error("Environment does not end with end.")
-		self.next()
-		name_group = self.scan_group()
-		name = detokenize(name_group.val)
-		return name
-	def scan_env_rest(self, env_def):
-		"""Scanning starts after \begin{envname}.
-		Returns env_instance.
-		"""
-		if not self.legal():
-			raise Error("No environment rest to scan.")
-		count = 1 # We are already within a boundary.
-		args = self.scan_args(env_def)
-		body = []
-		while count and self.uplegal():
-			old_pos = self.pos
-			d = self.test_env_boundary(self.item)
-			count += d
-			if 1 == d:
-				self.scan_env_begin()
-			elif -1 == d:
-				self.scan_env_end()
-			else:
-				self.next()
-			if 0 < count:
-				body.extend(self.data[old_pos : self.pos])
-		return Env_instance(env_def.name, args, body)
-	# Definitions
-	def restore_defs(self):
-		if os.path.isfile(self.defs_db_file):
-			print("Using defs db %s" % (self.defs_db_file))
-			db_h = shelve.open(self.defs_db)
-			self.defs = db_h["defs"]
-			db_h.close()
-	def save_defs(self):
-		db_h = shelve.open(self.defs_db)
-		if "defs" in db_h:
-			del db_h["defs"]
-		db_h["defs"] = self.defs
-		db_h.close()
-	def add_defs(self, defs_file):
-		defs_file_compl = defs_file + ".sty"
-		if not os.path.isfile(defs_file_compl):
-			raise Error("%s does not exist" % (defs_file_compl))
-		defs_db_file = self.defs_db_file
-		if newer(defs_db_file, defs_file_compl):
-			print("Using defs db %s for %s" % (defs_db_file, defs_file))
-		else:
-			defs_fp = open(defs_file_compl, "r")
-			defs_str = defs_fp.read()
-			defs_fp.close()
-			ds = Tex_stream()
-			ds.defs = self.defs
-			defs_text = ds.smart_tokenize(defs_str,isatletter=True)
-			# changing ds.defs will change self.defs
-			if self.debug:
-				defs_seen_file = "%s-seen.sty" % (defs_file)
-				defs_seen_fp = open(defs_seen_file, "w")
-				out = detokenize(defs_text,isatletter=True)
-				defs_seen_fp.write(out)
-				defs_seen_fp.close()
-			ds.scan_defs()
-			if self.debug:
-				out = ""
-				command_defs, env_defs = self.defs
-				for def_name in command_defs.keys():
-					out += command_defs[def_name].show() + "\n"
-				for def_name in env_defs.keys():
-					out += env_defs[def_name].show() +"\n"
-				print("Definitions after reading %s:" % (defs_file))
-				print(out)
-	# Applying definitions, recursively
-	# (maybe not quite in Knuth order, so avoid tricks!)
-	def subst_args(self, body, args):
-		out = []
-		pos = 0
-		while pos < len(body):
-			item = body[pos]
-			if not (simple_ty == item.type and "#" == item.val):
-				out.append(item)
-				pos += 1
-				continue
-			pos += 1
-			token = body[pos]
-			argnum = token.val
-			if not pos_digit_re.match(argnum):
-				raise Error("# is not followed by number.")
-			argnum = int(argnum)
-			if argnum > len(args):
-				raise Error("Too large argument number.")
-			arg = args[argnum-1]
-			out += arg
-			pos += 1
-		return out
-	def apply_command_recur(self, command_instance):
-		command_defs, env_defs = self.defs
-		name = command_instance.name
-		command_def = command_defs[name]
-		args = command_instance.args
-		body = command_def.body
-		result = self.subst_args(body, args)
-		try:
-			result = self.apply_all_recur(result)
-		except Empty_text_error as e:
-			raise Error("apply_all_recur fails on command instance %s: %s, %s" % \
-				  (command_instance.show(), detokenize(e.data), e.message))
-		return result
-	def apply_env_recur(self, env_instance):
-		command_defs, env_defs = self.defs
-		name = env_instance.name
-		env_def = env_defs[name]
-		begin, end = env_def.begin, env_def.end
-		body, args = env_instance.body, env_instance.args
-		out = self.subst_args(begin, args) + body + self.subst_args(end, args)
-		return self.apply_all_recur(out)
-	def apply_all_recur(self, data, report=False):
-		ts = Tex_stream(data)
-		ts.defs = self.defs
-		command_defs, env_defs = self.defs
-		out = []
-		progress_step = 10000
-		progress = progress_step
-		if not ts.legal():
-			raise Empty_text_error(data, "No text to process.")
-		while ts.uplegal():
-			if self.pos > progress:
-				if report:
-					print(self.pos)
-				progress += progress_step
-			if not ts.item.type in [esc_symb_ty, esc_str_ty]:
-				out.append(ts.item)
-				ts.next()
-				continue
-			if 1 == ts.test_env_boundary(ts.item):
-				old_pos = ts.pos
-				env_name = ts.scan_env_begin()
-				if env_name not in env_defs:
-					out.extend(ts.data[old_pos : ts.pos])
-					continue
-				else:
-					env_def = env_defs[env_name]
-					env_instance = ts.scan_env_rest(env_def)
-					result = ts.apply_env_recur(env_instance)
-					out.extend(result)
-			elif ts.item.val not in command_defs:
-				out.append(ts.item)
-				ts.next()
-				continue
-			else:
-				command_def = command_defs[ts.item.val]
-				command_inst = ts.scan_command(command_def)
-				result = ts.apply_command_recur(command_inst)
-				out.extend(result)
-		return out
-	# Processing files
-	def process_file(self, file):
-		"""Returns the new defs.
-		"""
-		file = cut_extension(file, ".tex")
-		source_file = "%s.tex" % (file)
-		print("File %s [" % (source_file))
-		source_fp = open(source_file, "r")
-		text_str = source_fp.read()
-		source_fp.close()
-		self.smart_tokenize(text_str, handle_inputs=True)
-		if not self.data:
-			raise Error("Empty tokenization result.")
-		self.reset()
-		if self.debug:
-			source_seen_fname = "%s-seen.tex" % (file)
-			source_seen_fp = open(source_seen_fname, "w")
-			source_seen_fp.write(detokenize(self.data))
-			source_seen_fp.close()
-		self.scan_defs()
-		self.data = self.apply_all_recur(self.data, report=True)
-		result_fname = "%s-clean.tex" % (file)
-		print("Writing %s [" % (result_fname))
-		result_fp = open(result_fname, "w")
-		result_fp.write(self.smart_detokenize())
-		result_fp.close()
-		print("] file %s" % (result_fname))
-		print("] file %s" % (source_file))
-	def process_if_newer(self, file):
-		"""
-		\input{file} is added to the token list.
-		If the input file is newer it is processed.
-		Returns tokenized \input{file}.
-		"""
-		file = cut_extension(file, ".tex")
-		tex_file = file+".tex"
-		clean_tex_file = file+"-clean.tex"
-		if newer(clean_tex_file, tex_file):
-			print("Using %s." % (clean_tex_file))
-		else:
-			ts = Tex_stream()
-			ts.data = []
-			ts.defs = self.defs
-			ts.process_file(file)
-		to_add = "\\input{%s}" % (file)
-		return tokenize(to_add)
-# Main
-long_optlist = ["debug","defs="]
-options, restargs = getopt_map("x", long_optlist)
-debug = False
-if "--debug" in options:
-	debug = True
-root = restargs[0]
-root = cut_extension(root, ".tex")
-if "--defs" in options:
-	defs_root = options["--defs"]
-else:
-	defs_root = "%s" % (root)
-defs_db = defs_root
-defs_db_file = defs_root+".db"
-ts = Tex_stream()
-ts.defs_db = defs_db
-ts.defs_db_file = defs_db_file
-ts.debug = debug
-ts.restore_defs()
-for root in restargs:
-	ts.process_file(root)
-print("(Re)creating defs db %s" % (defs_db))
-ts.save_defs()

utils/def_handle.py DELETED Viewed

@@ -1,75 +0,0 @@
-import argparse
-import re
-def main():
-    args = parse_command_line()
-    data = read(args.input)
-    data = convert(data)
-    write(args.output, data)
-def parse_command_line():
-    parser = argparse.ArgumentParser(
-        description='Replace \\def with \\newcommand where possible.',
-    )
-    parser.add_argument(
-        'input',
-        help='TeX input file with \\def',
-    )
-    parser.add_argument(
-        '--output',
-        '-o',
-        required=True,
-        help='TeX output file with \\newcommand',
-    )
-    return parser.parse_args()
-def read(path):
-    with open(path, mode='rb') as handle:
-        return handle.read()
-def convert(data):
-    return re.sub(
-        rb'((?:\\(?:expandafter|global|long|outer|protected)'
-        rb'(?: +|\r?\n *)?)*)?'
-        rb'\\def *(\\[a-zA-Z]+) *(?:#+([0-9]))*\{',
-        replace,
-        data,
-    )
-def replace(match):
-    prefix = match.group(1)
-    if (
-            prefix is not None and
-            (
-                b'expandafter' in prefix or
-                b'global' in prefix or
-                b'outer' in prefix or
-                b'protected' in prefix
-            )
-    ):
-        pass #return match.group(0)
-    result = rb'\newcommand'
-    result += b'{' + match.group(2) + b'}'
-    if match.lastindex == 3:
-        result += b'[' + match.group(3) + b']'
-    result += b'{'
-    return result
-def write(path, data):
-    with open(path, mode='wb') as handle:
-        handle.write(data)
-    print('=> File written: {0}'.format(path))
-if __name__ == '__main__':
-    main()

utils/gradio_utils.py DELETED Viewed

@@ -1,20 +0,0 @@
-from transformers import StoppingCriteria
-import sys
-# Handle termination signal
-def signal_handler(sig, frame):
-    print("\nTermination signal received. Shutting down Gradio interface.")
-    sys.exit(0)
-# Custom stopping criteria
-class StopOnTokens(StoppingCriteria):
-    def __call__(self, input_ids, scores, **kwargs):
-        stop_ids = [29, 0]  # Define specific stop token IDs
-        return input_ids[0][-1] in stop_ids
-# Toggle task selection
-def toggle_selection(current_task, new_task):
-    """Toggle task selection: deselect if clicked again, otherwise update selection."""
-    updated_task = "" if current_task == new_task else new_task
-    return updated_task

utils/graph_utils.py DELETED Viewed

@@ -1,111 +0,0 @@
-import regex
-import re
-def retrieve_text_cite(text, command):
-    base_pattern = (
-        r'\\' + command + r"(?:\[(?:.*?)\])*\{((?:[^{}]+|\{(?1)\})*)\}(?:\[(?:.*?)\])*"
-    )
-    def extract_text_inside_curly_braces(text):
-        pattern = r"\{((?:[^{}]|(?R))*)\}"
-        match = regex.search(pattern, text)
-        if match:
-            return match.group(1)
-        else:
-            return ""
-    found_texts = []
-    for match in regex.finditer(base_pattern, text):
-        temp_substring = text[match.span()[0] : match.span()[1]]
-        found_texts.append(extract_text_inside_curly_braces(temp_substring))
-    return found_texts
-def get_citing_sentences(content):
-    content_new = re.sub(r'[\n]+', ' ', content) # keep only one \n
-    content_new = re.sub(r'e\.g\.' , 'eg', content_new)
-    content_new = re.sub(r'i\.e\.' , 'eg', content_new)
-    content_new = re.sub(r'etc\.' , 'etc', content_new)
-    content_new = re.sub(r' +', ' ', content_new)
-    sentences = [sentence + '.' for sentence in content_new.split('.')]
-    citing_sentences = [s for s in sentences if '\\cite' in s]
-    results = {}
-    for s in citing_sentences:
-        citations = retrieve_text_cite(s, 'cite')
-        final_citations = []
-        for cite in citations:
-            final_citations.extend(cite.split(','))
-        results[s] = final_citations
-    return results
-def get_intro(content):
-    sections = retrieve_text_cite(content, 'section')
-    if sections == []:
-        return ''
-    try_intro = [x for x in sections if x.strip().lower() == 'introduction']
-    if try_intro == []:
-        return ''
-    else:
-        to_find = try_intro[0]
-        ind = sections.index(to_find)
-    if ind + 1 < len(sections):
-        start_marker = f'\\section{{{sections[ind]}}}'
-        end_marker = f'\\section{{{sections[ind+1]}}}'
-        start_point = content.find(start_marker)
-        end_point = content.find(end_marker)
-        return content[start_point+len(start_marker):end_point]
-    else:
-        return ''
-def get_related_works(content):
-    sections = retrieve_text_cite(content, 'section')
-    if sections == []:
-        return ''
-    possible_related = [
-        "Literature Review",
-        "Related Work",
-        "Related Works",
-        "Prior Work",
-        "Prior Works",
-        "Related Research",
-        "Research Overview",
-        "Previous Work",
-        "Previous Works",
-        "Review of the Literature",
-        "Review of Related Literature",
-        "Survey of Related Work",
-        "Survey of Related Works",
-        "Background",
-        "Research Background",
-        "Review of Prior Research",
-        "Literature Survey",
-        "Overview of Literature",
-        "Existing Literature",
-        "Review of Existing Work",
-        "Review of Existing Works",
-        "Review of Previous Studies",
-        "Review of Prior Literature",
-        "Summary of Related Research",
-        "Survey of Existing Literature",
-        "Survey of Literature",
-        "Existing Research Overview",
-        "Prior Literature Review"
-    ]
-    possible_sections = [x for x in sections if any([True for y in possible_related if y.lower() == x.strip().lower()])]
-    if possible_sections == []:
-        return ''
-    else:
-        to_find = possible_sections[0]
-        ind = sections.index(to_find)
-    if ind + 1 < len(sections):
-        start_marker = f'\\section{{{sections[ind]}}}'
-        end_marker = f'\\section{{{sections[ind+1]}}}'
-        start_point = content.find(start_marker)
-        end_point = content.find(end_marker)
-        return content[start_point+len(start_marker):end_point]
-    else:
-        return ''

utils/latexpand DELETED Viewed

@@ -1,713 +0,0 @@
-#!/usr/bin/perl
-# Inspired by latexpand by D. Musliner, University of Michigan
-# 2012-2023: Matthieu Moy <git@matthieu-moy.fr>
-# BSD License
-use strict;
-use Cwd;
-use Getopt::Long;
-use IO::Handle;
-use File::Spec;
-my $TEXINPUTS = $ENV{'TEXINPUTS'};
-# By default, search in current directory. We use '.' and not getcwd()
-# to avoid issues if the working directory contains a ':' character.
-if (!$TEXINPUTS) { $TEXINPUTS = '.'; }
-my $verbose;
-my $keep_comments;
-my $keep_includes;
-my $empty_comments;
-my $help;
-my $long_help;
-my %defines = ();
-my $output;
-my $explain;
-my $show_graphics;
-my $graphics_extensions = ":.pdf:.png:.jpg:.eps";
-my $expand_usepackage;
-my $expand_bbl;
-my $biber;
-my $fatal;
-my $version;
-my $makeatletter;
-my $inside_import;
-my $in_enc = "bytes";
-my $out_enc = "bytes";
-GetOptions (
-	'h' => \$help,
-	'help' => \$long_help,
-	'verbose|v' => \$verbose,
-	'keep-comments' => \$keep_comments,
-	'keep-includes' => \$keep_includes,
-	'empty-comments' => \$empty_comments,
-	'define|d=s%' => \%defines,
-	'output|o=s' => \$output,
-	'explain' => \$explain,
-	'show-graphics' => \$show_graphics,
-	'graphics-extensions' => \$graphics_extensions,
-	'expand-usepackage' => \$expand_usepackage,
-	'expand-bbl=s' => \$expand_bbl,
-	'biber=s' => \$biber,
-	'fatal' => \$fatal,
-	'version' => \$version,
-        'makeatletter' => \$makeatletter,
-	'in-encoding=s' => \$in_enc,
-	'out-encoding=s' => \$out_enc,
-) or pod2usage_wrapper(2);
-version() if $version;
-pod2usage_wrapper(0) if $help;
-pod2usage_wrapper(-exitstatus => 0, -output => \*STDOUT, -verbose => 2) if $long_help;
-sub pod2usage_wrapper
-{
-	# Like pod2usage, but fall back to a simpler implem in case
-	# pod2usage can't be found.
-	if  (eval {require Pod::Usage;1;} ne 1) {
-		print "Please install perldoc and Pod::Usage to get proper help.\n";
-		my $started = 0;
-		open (my $in, '<', "$0") or die $!;
-		while (<$in>) {
-			if ($started) {
-				print;
-			}
-			if (/^__END__$/) {
-				$started = 1;
-			}
-		}
-	} else {
-		Pod::Usage->import();
-		pod2usage(@_);
-	}
-}
-sub get_version
-{
-	# $VERSION's value will be substituted by 'make dist', but the
-	# next line won't (the string has to be broken to avoid it).
-	my $VERSION = 'v1.7.2';
-	if ($VERSION eq '@LATEXPAND' . '_VERSION@') {
-		my($vol,$dir,$file) = File::Spec->splitpath($0);
-		chdir($dir);
-		$VERSION = `git describe --tags HEAD 2>/dev/null`;
-	}
-	if ($VERSION eq '') {
-		$VERSION = '<unknown version>';
-	}
-	$VERSION =~ s/^\s+|\s+$//g;
-	return $VERSION;
-}
-sub version
-{
-	print "latexpand version ". get_version() .".\n";
-	exit(0);
-}
-my $nl = "";
-if ($empty_comments) {
-	$nl = "%\n";
-}
-if ($output && $output ne "-") {
-	open (my $OUTPUT, '>', "$output") or die $!;
-	STDOUT->fdopen(\*$OUTPUT, 'w') or die $!;
-}
-sub say
-{
-	if ($verbose) {
-		print STDERR "$_[0]";
-	}
-}
-my $makeatletter_found;
-my $in_preamble;
-use open IN  => ":$in_enc", OUT => ":$out_enc";
-foreach my $file (@ARGV)
-{
-	say "processing $file\n";
-	$makeatletter_found = 0;
-	$in_preamble = 1;
-        $inside_import = "";
-        if ($file =~ /\.bib$/) {
-                warn "WARNING: latexpand is not meant to be used on BibTeX files like '$file'.\n" .
-                        "    Run latexpand on your main .tex file, using '--expand-bbl FILE'\n" .
-                        "    or '--biber FILE' if needed to inline the generated bbl file.\n";
-        } elsif (not $file =~ /\.tex$/) {
-                warn "WARNING: latexpand is meant to be used on .tex files, which $file isn't.\n";
-        }
-	process_file($file, "  ");
-}
-sub cat_file
-{
-	my $file = shift;
-	open (my $INFILE, "<", $file) || die "could not open input file '$file'\n";
-	while (<$INFILE>) {
-		print;
-	}
-	close ($INFILE);
-}
-sub process_file
-{
-	my $file = shift;
-	my $prefix = (shift || "");
-	my $in_comment = 0;
-	open(my $FILE, "<", $file) or die "could not open input file '$file'\n";
-	my $commented_newline = 0;
-	while (my $line = <$FILE>) {
-		if ($line =~ /^[ \t]*\\endinput/) {
-			# Surprisingly, text after \endinput on the
-			# same line is kept in output. Also, add a
-			# space (before %), automatically inserted by
-			# TeX at the end of file.
-			$line =~ s/\\endinput(.*)\n?/$1 % /;
-			$in_comment = 1;
-			process_line($line, $prefix, \$commented_newline);
-			last;
-		}
-		while (my ($k, $v) = each (%defines))
-		{
-			$line=~s!\\$k!$v!g;
-		}
-		process_line($line, $prefix, \$commented_newline, $file);
-		if ($line =~ /^%.*[^\n]\z/ || $line =~ /[^\\]%.*[^\n]\z/) {
-			# file ends with a comment not ending with a newline
-			print "\n";
-		}
-		# Garbage at end of line after \end{document} is
-		# ignored by LaTeX, but we don't allow anything before
-		# to avoid e.g. \verb|\end{document}| from terminating
-		# the file.
-		if (!$keep_comments && $line =~ /^[ \t]*\\end\{document\}/) {
-			last;
-		}
-	}
-	close($FILE);
-	return $in_comment;
-}
-sub process_line
-{
-	my ($line, $prefix, $commented_newline, $file) = @_;
-	$_ = $line;
-	if ($$commented_newline) {
-		# Leading whitespaces after a comment is ignored.
-		# There's no space in:
-		# Line 1%
-		#    Line 2.
-		# Match just space and tabs (\s would match \n)
-		s/^[ \t]*//;
-		if (/^$/) {
-			# Deal with:
-			#
-			# Line 1 % comment
-			#
-			# Line 2
-			#
-			# The newline after Line 1 is commented, but we still
-			# want a new paragraph. We strip the comment together
-			# with its newline, but re-add a newline to chnge
-			# paragraph here if needed:
-			print "\n";
-		}
-	}
-	$$commented_newline = 0;
-	# Consider \makeatletter only in preamble, because we do want
-	# to warn on \someCommand{\makeatletter\command@with@arobase}.
-	if ($in_preamble && /^[^%]*\\makeatletter/) {
-		$makeatletter_found = 1;
-	}
-	if ($in_preamble && /^[^%]*\\makeatother/) {
-		$makeatletter_found = 0;
-	}
-	my $command;
-	if (!$makeatletter && !$makeatletter_found
-	    && (($command) = /^[^%]*(\\[[:alpha:]]*@[[:alpha:]]*)/)
-	    && ($command ne '\@')) {
-		print STDERR "Warning: command $command containing @ found in\n";
-		print STDERR "Warning: $file.\n";
-		print STDERR "Warning: consider using --makeatletter if the result is not compilable.\n";
-	}
-	# non-comment is a sequence of:
-	# - escaped character (\\.), including \% and \\
-	# - neither '%' nor '\'.
-	my $NON_COMMENT = '([^\\\\%]|\\\\.)*';
-	unless ($keep_comments) {
-		# Special-case for \url{} commands, which may contain '%'
-		# characters. It's hard to catch them in $NON_COMMENT since we'd
-		# need a regexp so that "\url{foo" can't match as non-comment in
-		# the line \url{foo%bar}, but "\url{foo%bar}" would match.
-		# Escaping these '%' is not mandatory, but allowed, hence we can
-		# pre-process the line by escaping them, and let latexpand work
-		# as normal afterwards.
-		# Known limitation: latexpand doesn't do balanced braces
-		# recognition, and just refuses both { and } within \url{}
-		# argument for %-detection to work ([^{}%] below). Fix should be
-		# possible using
-		# https://stackoverflow.com/questions/15301708/perl-regular-expression-match-nested-brackets
-		# but is it worth the trouble? (file an issue or send a merge
-		# request if you think it is)
-		# While there are \url{URL} with unescaped % in URL ...
-		my $NON_PERCENT = '([^\\}]%|[^{}%])*';
-		while (/^(?<before>.*\\url\{)(?<url>$NON_PERCENT[^\\}]%$NON_PERCENT)(?<after>\}.*)$/) {
-			my ($before, $url, $after) = ($+{before}, $+{url}, $+{after});
-			# escape unescaped % in URL, if any
-			$url =~ s/([^\\])%/$1\\%/g;
-			$_ = $before . $url . $after ."\n";
-		}
-		if (!$empty_comments) {
-			# Include \n in pattern to avoid matching
-			# comments at end of files
-			# remove comments + whitespace-only lines completely
-			if (s/^\s*%.*\n//) {
-				$$commented_newline = 1;
-			}
-			# Special-case commands at end of line. We
-			# don't want "\\foo%\nbar" to become
-			# "\\foobar" (but we still want \@% to result
-			# in no space!)
-			if (s/^($NON_COMMENT\\([[:alpha:]]|[[:alpha:]@]{2,}))%.*\n/$1 /) {
-				$$commented_newline = 1;
-			} elsif (s/^($NON_COMMENT)%.*\n/$1/) {
-				# remove only the comment if the line has actual content
-				$$commented_newline = 1;
-			}
-		}
-		# Apply the "empty comments" treatment unconditionally
-		# for comments not matched above (it doesn't harm to
-		# keep an empty comment sometimes, but it may harm to
-		# leave a real comment if the goal was to strip them).
-		s/^(([^\\%]|\\.)*)%.*$/$1%/;
-	}
-	unless ($keep_includes) {
-		# \input{foo.tex}
-		my $ARGBRACES = '\{\\s*([^"}\\s][^}]*)(\\s*)\}';
-		# \input{"foo bar.tex"}
-		my $ARGQUOTED = '\{\\s*"([^"]*)"(\\s*)\}';
-		# \input foo.tex
-		my $ARGSPACES = '\\s([^\{\\s][^\\s]+?)\\s()';
-		my $ARGUMENT = "\\s*?(?|$ARGBRACES|$ARGQUOTED|$ARGSPACES)";
-		if (my ($before, $ignored, $full_filename, $trailing, $after)
-		    = /^($NON_COMMENT)\\include$ARGUMENT(.*)$/) {
-			$full_filename = find_tex_file($full_filename . ".tex");
-			if ($full_filename) {
-				say $prefix . "Found include for file: $full_filename\n";
-				print $before . $nl;
-				print '\clearpage{}' . $nl;
-				print "% start include $full_filename\n" if ($explain);
-				my $in_comment = process_file($full_filename, $prefix . "  ");
-				if ($explain) {
-				    print " % end include $full_filename\n";
-				} elsif ($in_comment) {
-				    print "\n";
-				}
-				print '\clearpage{}' . $nl;
-				print $nl . $after . "\n";
-				$_ = "";
-			}
-		} elsif (my ($before, $ignored, $full_filename, $trailing,  $after)
-			 = /^($NON_COMMENT)\\input$ARGUMENT(.*)$/) {
-                        if ($inside_import) {
-                              $full_filename = $inside_import . $full_filename;
-                        }
-			$full_filename = find_tex_file($full_filename, ":.tex");
-			if ($full_filename) {
-				say $prefix . "Found input for file: $full_filename\n";
-				# Apparently, in some versions of LaTeX, a space
-				# after filename in \input{foo.tex } is inserted
-				# _before_ the inclusion. That was the case for
-                                # me when 31fa806 (deal with space after
-                                # filename in \input and \include, 2019-12-11)
-                                # was written, but is not anymore, hence we just
-                                # throw $trailing away.
-				print $before . $nl;
-				print "% start input $full_filename\n" if ($explain);
-				my $in_comment = process_file($full_filename, $prefix . "  ");
-				if ($explain) {
-				    print " % end input $full_filename\n";
-				} elsif ($in_comment) {
-				    print "\n";
-				}
-				if ($after =~ /[^\s]/) {
-				    # LaTeX produces this space, so let's do it also
-				    print " " . $nl . $after . "\n";
-				} else {
-				    print " ";
-				}
-				$_ = "";
-			}
-		} elsif (my ($before, $ignored, $dir, $ignored, $full_filename, $ignored,  $after)
-			 = /^($NON_COMMENT)\\(?:sub)?import$ARGUMENT$ARGUMENT(.*)$/) {
-                        if ($explain) {
-                              print "% dir " . $dir ."\n";
-                              print "% full_filename " . $full_filename ."\n";
-                              print "% after " . $after ."\n";
-                              print "% inside_import $inside_import\n";
-                        }
-                        $full_filename = $dir . $full_filename;
-                        if ($inside_import) {
-                              $full_filename = $inside_import . $full_filename;
-                        }
-                        print "% cat(inside_import,dir,full_filename) " . $full_filename ."\n" if ($explain);
-			$full_filename = find_tex_file($full_filename, ":.tex");
-			if ($full_filename) {
-				say $prefix . "Found input for file: $full_filename\n";
-				print $before . $nl;
-				print "% start input $full_filename\n" if ($explain);
-                                my $previous_import_dir = $inside_import;
-                                $inside_import = $inside_import . $dir;
-				my $in_comment = process_file($full_filename, $prefix . "  ");
-                                $inside_import = $previous_import_dir;
-				if ($explain) {
-				    print " % end input $full_filename\n";
-				} elsif ($in_comment) {
-				    print "\n";
-				}
-				if ($after =~ /[^\s]/) {
-				    # LaTeX produces this space, so let's do it also
-				    print " " . $nl . $after . "\n";
-				} else {
-				    print " ";
-				}
-				$_ = "";
-			}
-		} elsif (my ($before, $ignored, $args, $full_filename, $ignored, $after)
-			 = /^($NON_COMMENT)\\includegraphics(\[[^\]]*?\]|)$ARGUMENT(.*)$/) {
-                        if ($explain) {
-                                print "% inside_import " . $inside_import ."\n";
-                                print "% before " . $before ."\n";
-                                print "% ignored " . $ignored ."\n";
-                                print "% args " . $args ."\n";
-                                print "% full_filename " . $full_filename ."\n";
-                                print "% after " . $after ."\n";
-                        }
-                        if ($inside_import) {
-                                $full_filename = $inside_import . $full_filename;
-                                print "$before\\includegraphics" . "$args" . "{$full_filename}$after\n";
-                                $_ = "";
-                        }
-		} elsif (my ($before, $ignored, $args, $full_filename, $ignored, $after)
-			 = /^($NON_COMMENT)\\lstinputlisting(\[[^\]]*?\]|)$ARGUMENT(.*)$/) {
-                        if ($explain) {
-                                print "% inside_import " . $inside_import ."\n";
-                                print "% before " . $before ."\n";
-                                print "% ignored " . $ignored ."\n";
-                                print "% args " . $args ."\n";
-                                print "% full_filename " . $full_filename ."\n";
-                                print "% after " . $after ."\n";
-                        }
-                        if ($inside_import) {
-                                $full_filename = $inside_import . $full_filename;
-                                print "$before\\lstinputlisting" . "$args" . "{$full_filename}$after\n";
-                                $_ = "";
-                        }
-		}
-	}
-	if ($expand_usepackage) {
-		# Don't bother with before and after text, we just require the
-		# usepackage to be alone on its line.
-		if (my ($package_name) = /^\s*\\usepackage\{([^\}]*)\}\s*(%.*)?$/) {
-			my $full = find_file($package_name . ".sty", $TEXINPUTS);
-			if ($full) {
-				say $prefix . "Found package file: $full\n";
-				process_file($full, $prefix . "  ");
-				$_ = "";
-				# Forget about any commented newline
-				# before the \usepackage:
-				$$commented_newline = 0;
-			} else {
-				say $prefix . "Not including external package $package_name\n";
-			}
-		}
-	}
-	if ($expand_bbl) {
-		if (my ($before, $bib_name, $after)
-			 = /^(.*)\\(?:bibliography|bibselect)\{([^\}]*)\}(.*)$/) {
-			# The BBL file is not necessarily $bib_name.
-			# Take it from the command-line.
-			print $before . $nl;
-			say $prefix . "Expanding BBL file: $expand_bbl\n";
-			process_file($expand_bbl, $prefix . "  ");
-			print " " . $nl . $after . "\n";
-			$_ = "";
-		}
-	}
-	if ($biber) {
-		if (my ($before, $after)
-		    = /^(.*)\\(?:addbibresource)\{[^\}]*\}(.*)$/) {
-			# See https://tex.stackexchange.com/questions/166518/biblatex-include-bbl-problem-with-verb-field/166526#166526
-			my $biber_noext = $biber;
-			$biber_noext =~ s/.bbl//;
-			print $before . $nl;
-			say $prefix . "Expanding Biber BBL file: $biber\n";
-			print '\begin{filecontents*}{' . $biber . '}' . "\n";
-			cat_file($biber);
-			print "\n";
-			print '\end{filecontents*}
-\usepackage{xpatch}
-%Patch the biblatex input command.
-%replace "testinput-bbl" if you change the name above.
-%disable if you want to run biblatex/biber normally
-\makeatletter
-\patchcmd\blx@bblinput{\blx@blxinit}
-                      {\blx@blxinit
-                       \def\jobname{' . $biber_noext . '}%new jobname
-                      }{}{\fail}
-\makeatother
-			    ';
-			say $prefix . "End expansion of Biber BBL file: $biber\n";
-			print " " . $nl . $after . "\n";
-			$_ = "";
-		}
-	}
-	if ($show_graphics) {
-		if (/\\includegraphics(\[[^\]]*\])?{([^}]*)}/) {
-                        my $full_filename = $2;
-                        if ($inside_import) {
-                                $full_filename = $inside_import . $full_filename;
-                        }
-			my $full = find_tex_file($full_filename, $graphics_extensions);
-			say $prefix . "needs graphics file: ";
-			print STDERR "$full\n";
-		}
-	}
-	if (/^[ \t]*\\begin\{document\}/) {
-		$in_preamble = 0;
-		if ($makeatletter) {
-			print '\makeatletter' . $nl;
-		}
-	}
-	print;
-}
-sub unquote
-{
-	my $str = shift;
-	my $x = substr($str, 0, 1);
-	my $y = substr($str, -1, 1);
-	if ($x eq $y && ($x eq '"' || $x eq "'")) {
-		$str = substr($str, 1, -1);
-	}
-	# There's a weird LaTeX syntax: \include{"file\space
-	# with\space spaces"}, so remove these \space when unquoting.
-	$str =~ s/\\space / /g;
-	return $str;
-}
-# search $1 in $TEXINPUTS, with possible extensions in $2
-sub find_tex_file
-{
-	my $file = unquote(shift);
-	my $extensions = (shift || ":");
-	foreach my $ext (split(':', $extensions, -1)) {
-		my $full = find_file_global($file . $ext);
-		if ($full) {
-			return $full;
-		}
-	}
-	if ($fatal) {
-		die "ERROR: Could not find file [$file]\n";
-	} else {
-		print STDERR "Warning: Could not find file [$file]\n";
-		return;
-	}
-}
-sub find_file_global
-{
-	my $file = shift;
-	if (open(my $fh, "-|", "kpsewhich", $file)) {
-		my $full = <$fh>;
-		$full =~ s/\s+$//;
-		close($fh);
-		if ($full) {
-			return $full;
-		}
-	}
-	# Should be useless, but fall-back in case kpsewhich fails (or is not installed, or ...):
-	return find_file($file, $TEXINPUTS);
-}
-# Find files, not searching for global files (to allow not expanding global .sty packages)
-sub find_file
-{
-	my ($file, $path) = @_;
-	if (File::Spec->file_name_is_absolute($file)) {
-		if (-e "$file" && ! -d "$file") {
-			return $file;
-		} else {
-			return;
-		}
-	}
-	# TEXINPUTS=...: (trailing :) means "append default search
-	# directories". We don't want global directories here, but
-	# still add . that may be needed.
-	if (substr($path, -1) eq ':') {
-		$path .= '.';
-	}
-	foreach my $dir (split(':', $path)) {
-		if (-e "$dir/$file" && ! -d "$dir/$file") {
-			return("$dir/$file");
-		}
-	}
-	return;
-}
-__END__
-=head1 NAME
-latexpand - Flatten LaTeX file by expanding \include and \input, ... and  remove comments
-=head1 SYNOPSIS
-latexpand [options] FILE...
-=head2 Options:
-	--verbose        show what's going on
-	--keep-comments  don't strip comments (comments are lines
-                         starting with %, and anything below
-                         \end{document})
-	--empty-comments keep empty comments (i.e. % at end of lines) for clarity
-	--keep-includes  don't expand \input and \include directives
-	--expand-usepackage
-	                 Expand \usepackage{...} directives if the
-	                 corresponding .sty file is found in
-	                 $TEXINPUTS (or the current directory if
-	                 $TEXINPUTS is not set)
-	--expand-bbl FILE
-	                 Expand the bibliography by inlining FILE
-	                 (should be a *.bbl file)
-	--biber FILE	 Include \bibliography{} with FILE's content,
-	                 as needed by biblatex with the biber backend.
-	                 (similar to --expand-bbl FILE, but for
-	                 biber+biblatex).
-	--help           this help message
-	--define <key>=<val>, -d <key>=<val>
-	                 defines a macro key to be replaced by value, e.g.,
-	                 when called with -d foo=bar would replace all occurences
-	                 of \foo in the code with bar. Can be supplied multiple times.
-	--output <file>, -o <file>
-	                 generate output in <file>
-	--explain        generate explanatory comments in output
-	--show-graphics  show included graphics
-	--graphics_extensions
-	                 colon-separated list of possible graphics extensions
-	                 (used by --show-graphics to find the actual graphics files)
-	--fatal          Die in case a file can't be found.
-	--makeatletter   Insert a \makeatletter in the preamble. In some
-	                 rare cases it may break your document, but it
-	                 may help fixing bad interactions between
-	                 @-commands and inclusion (see BUGS section).
-	--in-encoding FMT, --out-encoding FMT
-			 File encoding used by input and output files.
-			 This uses the same syntax as PerlIO's layers.
-			 Example:
-			 --in-encoding 'encoding(UTF-8)'
-			 The default is 'bytes' and should always work.
-=head1 USES
-The most common use of latexpand is to simplify distribution of source
-LaTeX files, typically to satisfy the requirement of editors and
-archival sites (springer, arXiv.org, ...) who force the authors to
-submit sources. One does not necessarily want to submit sources with
-comments, and uploading a document made of several files including
-each other is a bit painful. By default, latexpand answers both
-problems by outputing a single LaTeX file that contain no comment.
-=head1 GETTING LATEXPAND
-The latest version of latexpand is available here:
-  https://gitlab.com/latexpand/latexpand
-Versions are uploaded to ctan.org from time to time:
-  http://www.ctan.org/pkg/latexpand
-=head1 BUGS
-Please, report bugs on the issue tracker on the project site:
-  https://gitlab.com/latexpand/latexpand/issues
-=head2 Known bugs
-=head3 Verbatim
-latexpand currently ignores \begin{verbatim} ... \end{verbatim}, and
-will therefore process any \include, \input, ... directives that
-appear within verbatim environments (while it shouldn't).
-LaTeX comments inside verbatim environments are also incorrectly
-stripped. You can use --keep-comments as a workaround to avoid this.
-=head3 Comment environment
-It would be nice to remove code between \begin{comment} and
-\end{comment} too if \usepackage{comment} is used.
-Code like
-	foo%
-	\begin{comment}
-will produce the incorrect
-	foo\begin{comment}
-A workaround is to use --empty-comments when such tricky usage of the
-comments package is done.
-=head3 \makeatletter and use with transfig/xfig with \scalebox{}
-If \input{} or \include{} appears as argument to a command, and the
-file included contains \makeatletter, then after expansion, the
-\makeatletter and the @-command appear as argument to the command,
-which is forbidden because the argument is parsed (and the @-command
-badly tokenized) before being executed.
-This happens with
-	\scalebox{ \input{file-generated-by-xfig.pdf_t} }
-Workaround: add \makeatletter before the scalebox manually in your
-code, like
-        \makeatletter{}
-	\scalebox{ \input{file-generated-by-xfig.pdf_t} }
-        \makeatother{}
-In the case of xfig generated files, it is necessary only for the
-first occurence.
-A more brute-force workaround is to use latexpand --makeatletter.
-=head1 SEE ALSO
-Instructions to include only the relevant .bib items (french):
-https://lacl.fr/~caubert/notes/portabilite-du-tex.html#dependances
-=head1 VERSION
-This is latexpand version v1.7.2.

utils/utils.py DELETED Viewed

@@ -1,701 +0,0 @@
-import sys
-import regex
-import yaml
-import shutil
-import bibtexparser
-from charset_normalizer import from_path
-from langdetect import detect
-import os
-import subprocess
-import numpy as np
-import networkx as nx
-import re
-def is_venv():
-    return (hasattr(sys, 'real_prefix') or
-            (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix))
-def read_yaml_file(file_path):
-    with open(file_path, 'r') as file:
-        try:
-            data = yaml.safe_load(file)
-            return data
-        except yaml.YAMLError as e:
-            print(f"Error reading YAML file: {e}")
-def read_tex_file(file_path):
-    with open(file_path, 'r', encoding='utf-8') as file:
-        tex_content = file.read()
-    return tex_content
-def write_tex_file(file_path, s):
-    with open(file_path, 'w', encoding='utf-8') as file:
-        file.write(s)
-def get_core(s):
-    start = '\\begin{document}'
-    end = '\\end{document}'
-    beginning_doc = s.find(start)
-    end_doc = s.rfind(end)
-    return s[beginning_doc+len(start):end_doc]
-def retrieve_text(text, command, keep_text=False):
-    """Removes '\\command{*}' from the string 'text'.
-    Regex `base_pattern` used to match balanced parentheses taken from:
-    https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses/35271017#35271017
-    """
-    base_pattern = (
-        r'\\' + command + r"(?:\[(?:.*?)\])*\{((?:[^{}]+|\{(?1)\})*)\}(?:\[(?:.*?)\])*"
-    )
-    def extract_text_inside_curly_braces(text):
-        """Extract text inside of {} from command string"""
-        pattern = r"\{((?:[^{}]|(?R))*)\}"
-        match = regex.search(pattern, text)
-        if match:
-            return match.group(1)
-        else:
-            return ""
-    # Loops in case of nested commands that need to retain text, e.g. \red{hello \red{world}}.
-    while True:
-        all_substitutions = []
-        has_match = False
-        for match in regex.finditer(base_pattern, text):
-            # In case there are only spaces or nothing up to the following newline,
-            # adds a percent, not to alter the newlines.
-            has_match = True
-            if not keep_text:
-                new_substring = ""
-            else:
-                temp_substring = text[match.span()[0] : match.span()[1]]
-                return extract_text_inside_curly_braces(temp_substring)
-            if match.span()[1] < len(text):
-                next_newline = text[match.span()[1] :].find("\n")
-                if next_newline != -1:
-                    text_until_newline = text[
-                        match.span()[1] : match.span()[1] + next_newline
-                    ]
-                    if (
-                        not text_until_newline or text_until_newline.isspace()
-                    ) and not keep_text:
-                        new_substring = "%"
-            all_substitutions.append((match.span()[0], match.span()[1], new_substring))
-        for start, end, new_substring in reversed(all_substitutions):
-            text = text[:start] + new_substring + text[end:]
-        if not keep_text or not has_match:
-            break
-def reduce_linebreaks(s):
-    return re.sub(r'(\n[ \t]*)+(\n[ \t]*)+', '\n\n', s)
-def replace_percentage(s):
-    return re.sub(r'% *\n', '\n', s)
-def reduce_spaces(s):
-    return re.sub(' +', ' ', s)
-def delete_urls(s):
-    return re.sub(r'http\S+', '', s)
-def remove_tilde(s):
-    s1 = re.sub(r'[~ ]\.', '.', s)
-    s2 = re.sub(r'[~ ],', ',', s1)
-    return re.sub(r'{}', '', s2)
-def remove_verbatim_words(s):
-    with open("configs/latex_commands.yaml", "r") as stream:
-        read_config = yaml.safe_load(stream)
-    for command in read_config['verbatim_to_delete']:
-        s = s.replace(command, '')
-    for command in read_config['two_arguments']:
-        pattern = r'\\' + command + r'{[^}]*}' + r'{[^}]*}'
-        s = re.sub(pattern, '', s)
-    for command in read_config['three_arguments']:
-        pattern = r'\\' + command + r'{[^}]*}' + r'{[^}]*}' + r'{[^}]*}'
-        s = re.sub(pattern, '', s)
-    for command in read_config['two_arguments_elaborate']:
-        s = remove_multargument(s, '\\' + command, 2)
-    for command in read_config['three_arguments_elaborate']:
-        s = remove_multargument(s, '\\' + command, 3)
-    for command in read_config['replace_comments']:
-        pattern = r'\\' + command
-        s = re.sub(pattern, '%', s)
-    s = re.sub(
-      r'\\end{[\s]*abstract[\s]*}',
-      '',
-      s,
-      flags=re.IGNORECASE
-    )
-    s = re.sub(
-      r'\\begin{[\s]*abstract[\s]*}',
-      'Abstract\n\n',
-      s,
-      flags=re.IGNORECASE
-    )
-    return s
-def yes_or_no(s):
-    return 1 if "Yes" == s[0:3] else 0 if "No" == s[0:2] else -1
-def get_main(directory):
-    file_paths = []
-    for root, _, files in os.walk(directory):
-        for file in files:
-            file_path = os.path.join(root, file)
-            file_paths.append(file_path)
-    latex_paths = [f for f in file_paths if f.endswith('.tex')]
-    number_tex = len(latex_paths)
-    if number_tex == 0:
-        return None
-    if number_tex == 1:
-        return latex_paths[0]
-    adjacency = np.zeros((number_tex, number_tex))
-    keys = [os.path.basename(path) for path in latex_paths]
-    reg_ex = r'\\input{(.*?)}|\\include{(.*?)}|\\import{(.*?)}|\\subfile{(.*?)}|\\include[*]{(.*?)}|}'
-    for i,file in enumerate(latex_paths):
-        content = read_tex_file(file)
-        find_pattern_input = re.findall(reg_ex, content)
-        find_pattern_input = [tup for tup in find_pattern_input if not all(element == "" for element in tup)]
-        number_matches = len(find_pattern_input)
-        if number_matches == 0:
-            continue
-        else:
-            content = replace_imports(file, content)
-        reg_ex_clean = r'\\input{(.*?)}|\\include{(.*?)}'
-        find_pattern_input = re.findall(reg_ex_clean, content)
-        number_matches = len(find_pattern_input)
-        for j in range(number_matches):
-            match = find_pattern_input[j]
-            non_empty_match = [t for t in match if t]
-            for non_empty in non_empty_match:
-                base_match = os.path.basename(non_empty)
-                if not base_match.endswith('.tex'):
-                    base_match = base_match + '.tex'
-                    if base_match not in keys:
-                        continue
-                ind = keys.index(base_match)
-                adjacency[i][ind] = 1
-    G = nx.from_numpy_array(adjacency, create_using=nx.DiGraph)
-    connected_components = list(nx.weakly_connected_components(G))
-    size_connected = [len(x) for x in connected_components]
-    maximum_size = max(size_connected)
-    biggest_connected = [x for x in connected_components if len(x) == maximum_size]
-    if len(biggest_connected)>1:
-        roots = [n for connected in biggest_connected for n in connected if not list(G.predecessors(n))]
-        _check = []
-        for r in roots:
-            try:
-                _check.append(check_begin(latex_paths[r]))
-            except Exception as e:
-                _check.append(False)
-        potentials_files = [latex_paths[x] for x, y in zip(roots, _check) if y == True]
-        sizes_files = [os.path.getsize(x) for x in potentials_files]
-        return potentials_files[sizes_files.index(max(sizes_files))]
-    else:
-        roots = [n for n in biggest_connected[0] if not list(G.predecessors(n))]
-        return latex_paths[roots[0]]
-def initial_clean(directory, config):
-    config_cmd = ''
-    if config == True:
-        config_cmd = '--config configs/cleaning_config.yaml'
-    temp_dir = directory[:directory.rfind('/')] + '_temp' + '/'
-    shutil.copytree(directory, temp_dir)
-    try:
-        command_res = os.system('arxiv_latex_cleaner --keep_bib {} {}'.format(directory, config_cmd))
-        if command_res != 0:
-            raise Exception('Error cleaning')
-        else:
-            shutil.rmtree(temp_dir)
-    except Exception as e:
-        shutil.rmtree(directory)
-        os.rename(temp_dir, directory)
-        file_paths = []
-        for root, _, files in os.walk(directory):
-            for file in files:
-                file_path = os.path.join(root, file)
-                file_paths.append(file_path)
-        latex_paths = [f for f in file_paths if f.endswith('.tex')]
-        for p in latex_paths:
-            results = from_path(p)
-            with open(p, 'w', encoding='utf-8') as f:
-                f.write(str(results.best()))
-        os.system('arxiv_latex_cleaner --keep_bib {} {}'.format(directory, config_cmd))
-    cleaned_directory = directory[:directory.rfind('/')] + '_arXiv'
-    shutil.rmtree(directory)
-    os.rename(cleaned_directory, directory)
-def check_begin(directory):
-    content = read_tex_file(directory)
-    english = detect(content) == 'en'
-    return True and english if re.findall(r'\\begin{document}', content) else False
-def post_processing(extracted_dir, file):
-    _dir = os.path.dirname(file) + '/'
-    perl_expand(file)
-    file = _dir + 'merged_latexpand.tex'
-    try:
-        de_macro(file)
-        file = _dir + 'merged_latexpand-clean.tex'
-    except Exception as e:
-        pass
-    try:
-        def_handle(file)
-    except Exception as e:
-        pass
-    try:
-        declare_operator(file) # has additional add-ons
-    except Exception as e:
-        pass
-    try:
-        de_macro(file)
-        file = _dir + os.path.splitext(os.path.basename(file))[0] + '-clean' + '.tex'
-    except Exception as e:
-        pass
-    initial_clean(_dir, config=True)
-    initial_clean(_dir, config=False)
-    tex_content = read_tex_file(file)
-    final_tex = reduce_spaces(
-        delete_urls(
-            remove_tilde(
-                reduce_linebreaks(
-                    replace_percentage(
-                        remove_verbatim_words(
-                            tex_content
-                        )
-                    )
-                )
-            )
-        )
-    ).strip()
-    shutil.rmtree(extracted_dir)
-    os.makedirs(extracted_dir)
-    write_tex_file(extracted_dir + 'final_cleaned.tex', final_tex)
-    initial_clean(extracted_dir, config=False)
-    return extracted_dir + 'final_cleaned.tex'
-def perl_expand(file):
-    # Save the current working directory
-    oldpwd = os.getcwd()
-    target_dir = os.path.dirname(file) + '/'
-    # Correctly construct the path
-    target = os.path.join(target_dir, 'latexpand')
-    src = './src/utils/latexpand'
-    # Copy the `latexpand` script to the target directory
-    shutil.copyfile(src, target)
-    # Change to the target directory
-    os.chdir(target_dir)
-    # Run the perl command without shell=True and handle redirection within Python
-    with open('merged_latexpand.tex', 'w') as output_file:
-        subprocess.run(['perl', 'latexpand', os.path.basename(file)],
-                       stdout=output_file, stderr=subprocess.DEVNULL)
-    # Return to the original directory
-    os.chdir(oldpwd)
-def de_macro(file):
-    # Save the current working directory\
-    oldpwd = os.getcwd()
-    target_dir = os.path.dirname(file) + '/'
-    # Construct the target path
-    target = os.path.join(target_dir, 'de-macro.py')
-    src = '.src/utils/de-macro.py'
-    # Copy the `de-macro.py` script to the target directory
-    shutil.copyfile(src, target)
-    # Change to the target directory
-    os.chdir(target_dir)
-    # Run the de-macro script without os.system and capture errors
-    try:
-        subprocess.run(['python3', 'de-macro.py', os.path.basename(file)],
-                       stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
-    except subprocess.CalledProcessError as e:
-        raise Exception(f"Error de-macro: {e}") from e
-    finally:
-        # Always return to the original directory
-        os.chdir(oldpwd)
-def def_handle(file):
-    h = os.system('python3 src/utils/def_handle.py {} --output {}'.format(file, file))
-    if h != 0:
-        raise Exception('Error def handle')
-def declare_operator(file):
-    s = read_tex_file(file)
-    ## Operators
-    pattern = r'\\DeclareMathOperator'
-    s = re.sub(pattern, r'\\newcommand', s)
-    pattern = {
-    r'\\newcommand\*': r'\\newcommand',
-    r'\\providecommand\*': r'\\newcommand',
-    r'\\providecommand': r'\\newcommand',
-    r'\\renewcommand\*': r'\\renewcommand',
-    r'\\newenvironment\*': r'\\newenvironment',
-    r'\\renewenvironment\*': r'\\renewenvironment'
-    }
-    s = re.sub(r'\\end +', r'\\end', s)
-    for key in pattern:
-        s = re.sub(key, pattern[key], s)
-    ## Title
-    start = '\\begin{document}'
-    beginning_doc = s.find(start)
-    pattern = {
-            r'\\icmltitlerunning\*': r'\\title',
-            r'\\icmltitlerunning': r'\\title',
-            r'\\inlinetitle\*': r'\\title',
-            r'\\icmltitle\*': r'\\title',
-            r'\\inlinetitle': r'\\title',
-            r'\\icmltitle': r'\\title',
-            r'\\titlerunning\*': r'\\title',
-            r'\\titlerunning': r'\\title',
-            r'\\toctitle': r'\\title',
-            r'\\title\*': r'\\title',
-            r'\\TITLE\*': r'\\title',
-            r'\\TITLE': r'\\title',
-            r'\\Title\*': r'\\title',
-            r'\\Title': r'\\title',
-        }
-    for key in pattern:
-        s = re.sub(key, pattern[key], s)
-    find_potential = s.find('\\title')
-    ## Remove \\
-    title_content = retrieve_text(s, 'title', keep_text = True)
-    if title_content != None:
-        cleaned_title = re.sub(r'\\\\', ' ', title_content)
-        cleaned_title = re.sub(r'\n',' ', cleaned_title)
-        cleaned_title = re.sub(r'\~',' ', cleaned_title)
-        s = s.replace(title_content, cleaned_title)
-        if find_potential != -1 and find_potential < beginning_doc:
-            s = s.replace('\\maketitle', cleaned_title)
-    ##  Cite and ref commands
-    pattern = {
-        r'\\citep\*': r'\\cite',
-        r'\\citet\*': r'\\cite',
-        r'\\citep': r'\\cite',
-        r'\\citet': r'\\cite',
-        r'\\cite\*': r'\\cite',
-        r'\\citealt\*': r'\\cite',
-        r'\\citealt': r'\\cite',
-        r'\\citealtp\*': r'\\cite',
-        r'\\citealp': r'\\cite',
-        r'\\citeyear\*': r'\\cite',
-        r'\\citeyear': r'\\cite',
-        r'\\citeauthor\*': r'\\cite',
-        r'\\citeauthor': r'\\cite',
-        r'\\citenum\*': r'\\cite',
-        r'\\citenum': r'\\cite',
-        r'\\cref': r'\\ref',
-        r'\\Cref': r'\\ref',
-        r'\\factref': r'\\ref',
-        r'\\appref': r'\\ref',
-        r'\\thmref': r'\\ref',
-        r'\\secref': r'\\ref',
-        r'\\lemref': r'\\ref',
-        r'\\corref': r'\\ref',
-        r'\\eqref': r'\\ref',
-        r'\\autoref': r'\\ref',
-        r'begin{thm}': r'begin{theorem}',
-        r'begin{lem}': r'begin{lemma}',
-        r'begin{cor}': r'begin{corollary}',
-        r'begin{exm}': r'begin{example}',
-        r'begin{defi}': r'begin{definition}',
-        r'begin{rem}': r'begin{remark}',
-        r'begin{prop}': r'begin{proposition}',
-        r'end{thm}': r'end{theorem}',
-        r'end{lem}': r'end{lemma}',
-        r'end{cor}': r'end{corollary}',
-        r'end{exm}': r'end{example}',
-        r'end{defi}': r'end{definition}',
-        r'end{rem}': r'end{remark}',
-        r'end{prop}': r'end{proposition}',
-    }
-    for key in pattern:
-        s = re.sub(key, pattern[key], s)
-    pattern = {
-        r'subsubsection':  r'section',
-        r'subsubsection ': r'section',
-        r'subsubsection\*':  r'section',
-        r'subsubsection\* ':  r'section',
-        r'subsection': r'section',
-        r'subsection ':  r'section',
-        r'subsection\*': r'section',
-        r'subsection\* ': r'section',
-        r'section ':  r'section',
-        r'section\*': r'section',
-        r'section\* ': r'section',
-        r'chapter':  r'section',
-        r'chapter ': r'section',
-        r'chapter\*':  r'section',
-        r'chapter\* ':  r'section',
-        r'mysubsubsection': r'section',
-        r'mysubsection':  r'section',
-        r'mysection':  r'section',
-    }
-    for key in pattern:
-        s = re.sub(key, pattern[key], s)
-    # In case any new commands for appendix/appendices
-    s = re.sub(r'newcommand{\\appendix}', '', s)
-    s = re.sub(r'newcommand{\\appendices}', '', s)
-    s = get_core(s)
-    ## In case of double titles being defined
-    title_content = retrieve_text(s, 'title', keep_text = True)
-    if title_content != None:
-        cleaned_title = re.sub(r'\\\\', ' ', title_content)
-        cleaned_title = re.sub(r'\n',' ', cleaned_title)
-        cleaned_title = re.sub(r'\~',' ', cleaned_title)
-        s = s.replace(title_content, cleaned_title)
-    write_tex_file(file, s)
-def replace_imports(file, s):
-    regex_p1 = r'\\import{(.*?)}{(.*?)}'
-    s = re.sub(regex_p1, r"\\input{\1\2}", s)
-    regex_p2 = r'\\subfile{(.*?)}'
-    s = re.sub(regex_p2, r"\\input{\1}", s)
-    regex_p3 = r'\\include[*]{(.*?)}'
-    s = re.sub(regex_p3, r"\\input{\1}", s)
-    write_tex_file(file, s)
-    return s
-def remove_multargument(s, target, k):
-    ind = s.find(target)
-    while ind != -1:
-        start_ind = ind + len(target)
-        stack_open = 0
-        stack_close = 0
-        track_arg  = 0
-        for i, char in enumerate(s[start_ind:]):
-            if char == '{':
-                stack_open += 1
-            if char == '}':
-                stack_close += 1
-            if stack_open !=0 and stack_close !=0:
-                if stack_open == stack_close:
-                    track_arg += 1
-                    stack_open = 0
-                    stack_close = 0
-            if track_arg == k:
-                break
-        s = s[:ind] + s[start_ind + i + 1:]
-        ind = s.find(target)
-    return s
-def fix_citations(s):
-    pattern = {
-    r'\\citep\*': r'\\cite',
-    r'\\citet\*': r'\\cite',
-    r'\\citep': r'\\cite',
-    r'\\citet': r'\\cite',
-    r'\\cite\*': r'\\cite',
-    r'\\citealt\*': r'\\cite',
-    r'\\citealt': r'\\cite',
-    r'\\citealtp\*': r'\\cite',
-    r'\\citealp': r'\\cite',
-    r'\\citeyear\*': r'\\cite',
-    r'\\citeyear': r'\\cite',
-    r'\\citeauthor\*': r'\\cite',
-    r'\\citeauthor': r'\\cite',
-    r'\\citenum\*': r'\\cite',
-    r'\\citenum': r'\\cite'
-    }
-    for key in pattern:
-        s = re.sub(key, pattern[key], s)
-    return s
-def find_bib(directory):
-    file_paths = []
-    for root, _, files in os.walk(directory):
-        for file in files:
-            file_path = os.path.join(root, file)
-            file_paths.append(file_path)
-    bib_paths = [f for f in file_paths if f.endswith('.bib')]
-    return bib_paths
-def create_bib_from_bbl(bibfile):
-    with open(bibfile, 'r') as f:
-        content = f.read()
-    library_raw = bibtexparser.parse_string(content)
-    library = {}
-    for block in library_raw.blocks:
-        if isinstance(
-            block,
-            (bibtexparser.model.DuplicateBlockKeyBlock, bibtexparser.model.ParsingFailedBlock, bibtexparser.model.ImplicitComment)
-        ):
-            continue
-        fields = {}
-        for field in block.fields:
-            fields[field.key] = field.value
-        ## Get a good title one ##
-        field_content = fields["note"]
-        field_content = field_content.replace("\n", " ")
-        field_content = re.sub(" +", " ", field_content)
-        if field_content.find("``") != -1 and field_content.find("\'\'") != -1:
-            title = (
-                field_content[field_content.find("``") + 2 : field_content.find("\'\'")]
-                    .replace("\\emph", "")
-                    .replace("\\emp", "")
-                    .replace("\\em", "")
-                    .replace(",", "")
-                    .replace("{", "")
-                    .replace("}","")
-                    .replace("``", "")
-                    .replace("\'\'", "")
-                    .strip(".")
-                    .strip()
-                    .strip(".")
-                    .lower()
-            )
-            fields['title'] = title
-        else:
-            if field_content.count("\\newblock") == 2:
-                field_content = field_content.replace("\\newblock", "``", 1)
-                field_content = field_content.replace("\\newblock", "\'\'", 1)
-                if field_content.find("``") != -1 and field_content.find("\'\'") != -1:
-                    title = (
-                        field_content[field_content.find("``") + 2 : field_content.find("\'\'")]
-                        .replace("\\emph", "")
-                        .replace("\\emp", "")
-                        .replace("\\em", "")
-                        .replace(",", "")
-                        .replace("{", "")
-                        .replace("}","")
-                        .replace("``", "")
-                        .replace("\'\'", "")
-                        .strip(".")
-                        .strip()
-                        .strip(".")
-                        .lower()
-                    )
-                    fields['title'] = title
-        library[block.key] = fields
-    return library
-def create_bib(bibfile):
-    with open(bibfile, 'r') as f:
-        content = f.read()
-    library_raw = bibtexparser.parse_string(content)
-    library = {}
-    for block in library_raw.blocks:
-        if isinstance(
-            block,
-            (bibtexparser.model.DuplicateBlockKeyBlock, bibtexparser.model.ParsingFailedBlock, bibtexparser.model.ImplicitComment)
-        ):
-            continue
-        fields = {}
-        for field in block.fields:
-            fields[field.key] = field.value.replace('{', '').replace('}', '')
-            if field.key == 'title':
-                title = re.sub(r'[\n]+', ' ', field.value) # keep only one \n
-                title = re.sub(r' +', ' ', title)
-                fields[field.key] = (
-                    title.replace("\\emph", "")
-                    .replace("\\emp", "")
-                    .replace("\\em", "")
-                    .replace(",", "")
-                    .replace("{", "")
-                    .replace("}", "")
-                    .strip(".")
-                    .strip()
-                    .strip(".")
-                    .lower()
-                )
-        if 'title' not in fields:
-            continue
-        library[block.key] = fields
-    return library
-def find_bbl(directory):
-    file_paths = []
-    for root, _, files in os.walk(directory):
-        for file in files:
-            file_path = os.path.join(root, file)
-            file_paths.append(file_path)
-    bib_paths = [f for f in file_paths if f.endswith('.bbl')]
-    return bib_paths
-def textobib(file):
-    oldpwd = os.getcwd()
-    target_dir = os.path.dirname(file) + '/'
-    target = target_dir + 'tex2bib'
-    src = './tex2bib'
-    shutil.copyfile(src, target)
-    os.chdir(target_dir)
-    output_file = os.path.splitext(os.path.basename(file))[0]  + '.bib'
-    os.system('perl tex2bib -i {} -o {}'.format(os.path.basename(file), output_file))
-    os.chdir(oldpwd)
-    return target_dir + output_file
-def get_library_bib(bib_files):
-    library = []
-    for bib_file in bib_files:
-        library.append(create_bib(bib_file))
-    final_library = {}
-    for d in library:
-        final_library.update(d)
-    return final_library
-def get_library_bbl(bbl_files):
-    bib_files = []
-    for bbl_file in bbl_files:
-        bib_files.append(textobib(bbl_file))
-    library = []
-    for bib_file in bib_files:
-        library.append(create_bib_from_bbl(bib_file))
-    final_library = {}
-    for d in library:
-        final_library.update(d)
-    return final_library