Spaces:
Build error
Build error
| import sys | |
| import regex | |
| import yaml | |
| import shutil | |
| import bibtexparser | |
| from charset_normalizer import from_path | |
| from langdetect import detect | |
| import os | |
| import subprocess | |
| import numpy as np | |
| import networkx as nx | |
| import re | |
| def is_venv(): | |
| return (hasattr(sys, 'real_prefix') or | |
| (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)) | |
| def read_yaml_file(file_path): | |
| with open(file_path, 'r') as file: | |
| try: | |
| data = yaml.safe_load(file) | |
| return data | |
| except yaml.YAMLError as e: | |
| print(f"Error reading YAML file: {e}") | |
| def read_tex_file(file_path): | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| tex_content = file.read() | |
| return tex_content | |
| def write_tex_file(file_path, s): | |
| with open(file_path, 'w', encoding='utf-8') as file: | |
| file.write(s) | |
| def get_core(s): | |
| start = '\\begin{document}' | |
| end = '\\end{document}' | |
| beginning_doc = s.find(start) | |
| end_doc = s.rfind(end) | |
| return s[beginning_doc+len(start):end_doc] | |
| def retrieve_text(text, command, keep_text=False): | |
| """Removes '\\command{*}' from the string 'text'. | |
| Regex `base_pattern` used to match balanced parentheses taken from: | |
| https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses/35271017#35271017 | |
| """ | |
| base_pattern = ( | |
| r'\\' + command + r"(?:\[(?:.*?)\])*\{((?:[^{}]+|\{(?1)\})*)\}(?:\[(?:.*?)\])*" | |
| ) | |
| def extract_text_inside_curly_braces(text): | |
| """Extract text inside of {} from command string""" | |
| pattern = r"\{((?:[^{}]|(?R))*)\}" | |
| match = regex.search(pattern, text) | |
| if match: | |
| return match.group(1) | |
| else: | |
| return "" | |
| # Loops in case of nested commands that need to retain text, e.g. \red{hello \red{world}}. | |
| while True: | |
| all_substitutions = [] | |
| has_match = False | |
| for match in regex.finditer(base_pattern, text): | |
| # In case there are only spaces or nothing up to the following newline, | |
| # adds a percent, not to alter the newlines. | |
| has_match = True | |
| if not keep_text: | |
| new_substring = "" | |
| else: | |
| temp_substring = text[match.span()[0] : match.span()[1]] | |
| return extract_text_inside_curly_braces(temp_substring) | |
| if match.span()[1] < len(text): | |
| next_newline = text[match.span()[1] :].find("\n") | |
| if next_newline != -1: | |
| text_until_newline = text[ | |
| match.span()[1] : match.span()[1] + next_newline | |
| ] | |
| if ( | |
| not text_until_newline or text_until_newline.isspace() | |
| ) and not keep_text: | |
| new_substring = "%" | |
| all_substitutions.append((match.span()[0], match.span()[1], new_substring)) | |
| for start, end, new_substring in reversed(all_substitutions): | |
| text = text[:start] + new_substring + text[end:] | |
| if not keep_text or not has_match: | |
| break | |
| def reduce_linebreaks(s): | |
| return re.sub(r'(\n[ \t]*)+(\n[ \t]*)+', '\n\n', s) | |
| def replace_percentage(s): | |
| return re.sub(r'% *\n', '\n', s) | |
| def reduce_spaces(s): | |
| return re.sub(' +', ' ', s) | |
| def delete_urls(s): | |
| return re.sub(r'http\S+', '', s) | |
| def remove_tilde(s): | |
| s1 = re.sub(r'[~ ]\.', '.', s) | |
| s2 = re.sub(r'[~ ],', ',', s1) | |
| return re.sub(r'{}', '', s2) | |
| def remove_verbatim_words(s): | |
| with open("configs/latex_commands.yaml", "r") as stream: | |
| read_config = yaml.safe_load(stream) | |
| for command in read_config['verbatim_to_delete']: | |
| s = s.replace(command, '') | |
| for command in read_config['two_arguments']: | |
| pattern = r'\\' + command + r'{[^}]*}' + r'{[^}]*}' | |
| s = re.sub(pattern, '', s) | |
| for command in read_config['three_arguments']: | |
| pattern = r'\\' + command + r'{[^}]*}' + r'{[^}]*}' + r'{[^}]*}' | |
| s = re.sub(pattern, '', s) | |
| for command in read_config['two_arguments_elaborate']: | |
| s = remove_multargument(s, '\\' + command, 2) | |
| for command in read_config['three_arguments_elaborate']: | |
| s = remove_multargument(s, '\\' + command, 3) | |
| for command in read_config['replace_comments']: | |
| pattern = r'\\' + command | |
| s = re.sub(pattern, '%', s) | |
| s = re.sub( | |
| r'\\end{[\s]*abstract[\s]*}', | |
| '', | |
| s, | |
| flags=re.IGNORECASE | |
| ) | |
| s = re.sub( | |
| r'\\begin{[\s]*abstract[\s]*}', | |
| 'Abstract\n\n', | |
| s, | |
| flags=re.IGNORECASE | |
| ) | |
| return s | |
| def yes_or_no(s): | |
| return 1 if "Yes" == s[0:3] else 0 if "No" == s[0:2] else -1 | |
| def get_main(directory): | |
| file_paths = [] | |
| for root, _, files in os.walk(directory): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| file_paths.append(file_path) | |
| latex_paths = [f for f in file_paths if f.endswith('.tex')] | |
| number_tex = len(latex_paths) | |
| if number_tex == 0: | |
| return None | |
| if number_tex == 1: | |
| return latex_paths[0] | |
| adjacency = np.zeros((number_tex, number_tex)) | |
| keys = [os.path.basename(path) for path in latex_paths] | |
| reg_ex = r'\\input{(.*?)}|\\include{(.*?)}|\\import{(.*?)}|\\subfile{(.*?)}|\\include[*]{(.*?)}|}' | |
| for i,file in enumerate(latex_paths): | |
| content = read_tex_file(file) | |
| find_pattern_input = re.findall(reg_ex, content) | |
| find_pattern_input = [tup for tup in find_pattern_input if not all(element == "" for element in tup)] | |
| number_matches = len(find_pattern_input) | |
| if number_matches == 0: | |
| continue | |
| else: | |
| content = replace_imports(file, content) | |
| reg_ex_clean = r'\\input{(.*?)}|\\include{(.*?)}' | |
| find_pattern_input = re.findall(reg_ex_clean, content) | |
| number_matches = len(find_pattern_input) | |
| for j in range(number_matches): | |
| match = find_pattern_input[j] | |
| non_empty_match = [t for t in match if t] | |
| for non_empty in non_empty_match: | |
| base_match = os.path.basename(non_empty) | |
| if not base_match.endswith('.tex'): | |
| base_match = base_match + '.tex' | |
| if base_match not in keys: | |
| continue | |
| ind = keys.index(base_match) | |
| adjacency[i][ind] = 1 | |
| G = nx.from_numpy_array(adjacency, create_using=nx.DiGraph) | |
| connected_components = list(nx.weakly_connected_components(G)) | |
| size_connected = [len(x) for x in connected_components] | |
| maximum_size = max(size_connected) | |
| biggest_connected = [x for x in connected_components if len(x) == maximum_size] | |
| if len(biggest_connected)>1: | |
| roots = [n for connected in biggest_connected for n in connected if not list(G.predecessors(n))] | |
| _check = [] | |
| for r in roots: | |
| try: | |
| _check.append(check_begin(latex_paths[r])) | |
| except Exception as e: | |
| _check.append(False) | |
| potentials_files = [latex_paths[x] for x, y in zip(roots, _check) if y == True] | |
| sizes_files = [os.path.getsize(x) for x in potentials_files] | |
| return potentials_files[sizes_files.index(max(sizes_files))] | |
| else: | |
| roots = [n for n in biggest_connected[0] if not list(G.predecessors(n))] | |
| return latex_paths[roots[0]] | |
| def initial_clean(directory, config): | |
| config_cmd = '' | |
| if config == True: | |
| config_cmd = '--config configs/cleaning_config.yaml' | |
| temp_dir = directory[:directory.rfind('/')] + '_temp' + '/' | |
| shutil.copytree(directory, temp_dir) | |
| try: | |
| command_res = os.system('arxiv_latex_cleaner --keep_bib {} {}'.format(directory, config_cmd)) | |
| if command_res != 0: | |
| raise Exception('Error cleaning') | |
| else: | |
| shutil.rmtree(temp_dir) | |
| except Exception as e: | |
| shutil.rmtree(directory) | |
| os.rename(temp_dir, directory) | |
| file_paths = [] | |
| for root, _, files in os.walk(directory): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| file_paths.append(file_path) | |
| latex_paths = [f for f in file_paths if f.endswith('.tex')] | |
| for p in latex_paths: | |
| results = from_path(p) | |
| with open(p, 'w', encoding='utf-8') as f: | |
| f.write(str(results.best())) | |
| os.system('arxiv_latex_cleaner --keep_bib {} {}'.format(directory, config_cmd)) | |
| cleaned_directory = directory[:directory.rfind('/')] + '_arXiv' | |
| shutil.rmtree(directory) | |
| os.rename(cleaned_directory, directory) | |
| def check_begin(directory): | |
| content = read_tex_file(directory) | |
| english = detect(content) == 'en' | |
| return True and english if re.findall(r'\\begin{document}', content) else False | |
| def post_processing(extracted_dir, file): | |
| _dir = os.path.dirname(file) + '/' | |
| perl_expand(file) | |
| file = _dir + 'merged_latexpand.tex' | |
| try: | |
| de_macro(file) | |
| file = _dir + 'merged_latexpand-clean.tex' | |
| except Exception as e: | |
| pass | |
| try: | |
| def_handle(file) | |
| except Exception as e: | |
| pass | |
| try: | |
| declare_operator(file) # has additional add-ons | |
| except Exception as e: | |
| pass | |
| try: | |
| de_macro(file) | |
| file = _dir + os.path.splitext(os.path.basename(file))[0] + '-clean' + '.tex' | |
| except Exception as e: | |
| pass | |
| initial_clean(_dir, config=True) | |
| initial_clean(_dir, config=False) | |
| tex_content = read_tex_file(file) | |
| final_tex = reduce_spaces( | |
| delete_urls( | |
| remove_tilde( | |
| reduce_linebreaks( | |
| replace_percentage( | |
| remove_verbatim_words( | |
| tex_content | |
| ) | |
| ) | |
| ) | |
| ) | |
| ) | |
| ).strip() | |
| shutil.rmtree(extracted_dir) | |
| os.makedirs(extracted_dir) | |
| write_tex_file(extracted_dir + 'final_cleaned.tex', final_tex) | |
| initial_clean(extracted_dir, config=False) | |
| return extracted_dir + 'final_cleaned.tex' | |
| def perl_expand(file): | |
| # Save the current working directory | |
| oldpwd = os.getcwd() | |
| target_dir = os.path.dirname(file) + '/' | |
| # Correctly construct the path | |
| target = os.path.join(target_dir, 'latexpand') | |
| src = './src/utils/latexpand' | |
| # Copy the `latexpand` script to the target directory | |
| shutil.copyfile(src, target) | |
| # Change to the target directory | |
| os.chdir(target_dir) | |
| # Run the perl command without shell=True and handle redirection within Python | |
| with open('merged_latexpand.tex', 'w') as output_file: | |
| subprocess.run(['perl', 'latexpand', os.path.basename(file)], | |
| stdout=output_file, stderr=subprocess.DEVNULL) | |
| # Return to the original directory | |
| os.chdir(oldpwd) | |
| def de_macro(file): | |
| # Save the current working directory\ | |
| oldpwd = os.getcwd() | |
| target_dir = os.path.dirname(file) + '/' | |
| # Construct the target path | |
| target = os.path.join(target_dir, 'de-macro.py') | |
| src = '.src/utils/de-macro.py' | |
| # Copy the `de-macro.py` script to the target directory | |
| shutil.copyfile(src, target) | |
| # Change to the target directory | |
| os.chdir(target_dir) | |
| # Run the de-macro script without os.system and capture errors | |
| try: | |
| subprocess.run(['python3', 'de-macro.py', os.path.basename(file)], | |
| stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) | |
| except subprocess.CalledProcessError as e: | |
| raise Exception(f"Error de-macro: {e}") from e | |
| finally: | |
| # Always return to the original directory | |
| os.chdir(oldpwd) | |
| def def_handle(file): | |
| h = os.system('python3 src/utils/def_handle.py {} --output {}'.format(file, file)) | |
| if h != 0: | |
| raise Exception('Error def handle') | |
| def declare_operator(file): | |
| s = read_tex_file(file) | |
| ## Operators | |
| pattern = r'\\DeclareMathOperator' | |
| s = re.sub(pattern, r'\\newcommand', s) | |
| pattern = { | |
| r'\\newcommand\*': r'\\newcommand', | |
| r'\\providecommand\*': r'\\newcommand', | |
| r'\\providecommand': r'\\newcommand', | |
| r'\\renewcommand\*': r'\\renewcommand', | |
| r'\\newenvironment\*': r'\\newenvironment', | |
| r'\\renewenvironment\*': r'\\renewenvironment' | |
| } | |
| s = re.sub(r'\\end +', r'\\end', s) | |
| for key in pattern: | |
| s = re.sub(key, pattern[key], s) | |
| ## Title | |
| start = '\\begin{document}' | |
| beginning_doc = s.find(start) | |
| pattern = { | |
| r'\\icmltitlerunning\*': r'\\title', | |
| r'\\icmltitlerunning': r'\\title', | |
| r'\\inlinetitle\*': r'\\title', | |
| r'\\icmltitle\*': r'\\title', | |
| r'\\inlinetitle': r'\\title', | |
| r'\\icmltitle': r'\\title', | |
| r'\\titlerunning\*': r'\\title', | |
| r'\\titlerunning': r'\\title', | |
| r'\\toctitle': r'\\title', | |
| r'\\title\*': r'\\title', | |
| r'\\TITLE\*': r'\\title', | |
| r'\\TITLE': r'\\title', | |
| r'\\Title\*': r'\\title', | |
| r'\\Title': r'\\title', | |
| } | |
| for key in pattern: | |
| s = re.sub(key, pattern[key], s) | |
| find_potential = s.find('\\title') | |
| ## Remove \\ | |
| title_content = retrieve_text(s, 'title', keep_text = True) | |
| if title_content != None: | |
| cleaned_title = re.sub(r'\\\\', ' ', title_content) | |
| cleaned_title = re.sub(r'\n',' ', cleaned_title) | |
| cleaned_title = re.sub(r'\~',' ', cleaned_title) | |
| s = s.replace(title_content, cleaned_title) | |
| if find_potential != -1 and find_potential < beginning_doc: | |
| s = s.replace('\\maketitle', cleaned_title) | |
| ## Cite and ref commands | |
| pattern = { | |
| r'\\citep\*': r'\\cite', | |
| r'\\citet\*': r'\\cite', | |
| r'\\citep': r'\\cite', | |
| r'\\citet': r'\\cite', | |
| r'\\cite\*': r'\\cite', | |
| r'\\citealt\*': r'\\cite', | |
| r'\\citealt': r'\\cite', | |
| r'\\citealtp\*': r'\\cite', | |
| r'\\citealp': r'\\cite', | |
| r'\\citeyear\*': r'\\cite', | |
| r'\\citeyear': r'\\cite', | |
| r'\\citeauthor\*': r'\\cite', | |
| r'\\citeauthor': r'\\cite', | |
| r'\\citenum\*': r'\\cite', | |
| r'\\citenum': r'\\cite', | |
| r'\\cref': r'\\ref', | |
| r'\\Cref': r'\\ref', | |
| r'\\factref': r'\\ref', | |
| r'\\appref': r'\\ref', | |
| r'\\thmref': r'\\ref', | |
| r'\\secref': r'\\ref', | |
| r'\\lemref': r'\\ref', | |
| r'\\corref': r'\\ref', | |
| r'\\eqref': r'\\ref', | |
| r'\\autoref': r'\\ref', | |
| r'begin{thm}': r'begin{theorem}', | |
| r'begin{lem}': r'begin{lemma}', | |
| r'begin{cor}': r'begin{corollary}', | |
| r'begin{exm}': r'begin{example}', | |
| r'begin{defi}': r'begin{definition}', | |
| r'begin{rem}': r'begin{remark}', | |
| r'begin{prop}': r'begin{proposition}', | |
| r'end{thm}': r'end{theorem}', | |
| r'end{lem}': r'end{lemma}', | |
| r'end{cor}': r'end{corollary}', | |
| r'end{exm}': r'end{example}', | |
| r'end{defi}': r'end{definition}', | |
| r'end{rem}': r'end{remark}', | |
| r'end{prop}': r'end{proposition}', | |
| } | |
| for key in pattern: | |
| s = re.sub(key, pattern[key], s) | |
| pattern = { | |
| r'subsubsection': r'section', | |
| r'subsubsection ': r'section', | |
| r'subsubsection\*': r'section', | |
| r'subsubsection\* ': r'section', | |
| r'subsection': r'section', | |
| r'subsection ': r'section', | |
| r'subsection\*': r'section', | |
| r'subsection\* ': r'section', | |
| r'section ': r'section', | |
| r'section\*': r'section', | |
| r'section\* ': r'section', | |
| r'chapter': r'section', | |
| r'chapter ': r'section', | |
| r'chapter\*': r'section', | |
| r'chapter\* ': r'section', | |
| r'mysubsubsection': r'section', | |
| r'mysubsection': r'section', | |
| r'mysection': r'section', | |
| } | |
| for key in pattern: | |
| s = re.sub(key, pattern[key], s) | |
| # In case any new commands for appendix/appendices | |
| s = re.sub(r'newcommand{\\appendix}', '', s) | |
| s = re.sub(r'newcommand{\\appendices}', '', s) | |
| s = get_core(s) | |
| ## In case of double titles being defined | |
| title_content = retrieve_text(s, 'title', keep_text = True) | |
| if title_content != None: | |
| cleaned_title = re.sub(r'\\\\', ' ', title_content) | |
| cleaned_title = re.sub(r'\n',' ', cleaned_title) | |
| cleaned_title = re.sub(r'\~',' ', cleaned_title) | |
| s = s.replace(title_content, cleaned_title) | |
| write_tex_file(file, s) | |
| def replace_imports(file, s): | |
| regex_p1 = r'\\import{(.*?)}{(.*?)}' | |
| s = re.sub(regex_p1, r"\\input{\1\2}", s) | |
| regex_p2 = r'\\subfile{(.*?)}' | |
| s = re.sub(regex_p2, r"\\input{\1}", s) | |
| regex_p3 = r'\\include[*]{(.*?)}' | |
| s = re.sub(regex_p3, r"\\input{\1}", s) | |
| write_tex_file(file, s) | |
| return s | |
| def remove_multargument(s, target, k): | |
| ind = s.find(target) | |
| while ind != -1: | |
| start_ind = ind + len(target) | |
| stack_open = 0 | |
| stack_close = 0 | |
| track_arg = 0 | |
| for i, char in enumerate(s[start_ind:]): | |
| if char == '{': | |
| stack_open += 1 | |
| if char == '}': | |
| stack_close += 1 | |
| if stack_open !=0 and stack_close !=0: | |
| if stack_open == stack_close: | |
| track_arg += 1 | |
| stack_open = 0 | |
| stack_close = 0 | |
| if track_arg == k: | |
| break | |
| s = s[:ind] + s[start_ind + i + 1:] | |
| ind = s.find(target) | |
| return s | |
| def fix_citations(s): | |
| pattern = { | |
| r'\\citep\*': r'\\cite', | |
| r'\\citet\*': r'\\cite', | |
| r'\\citep': r'\\cite', | |
| r'\\citet': r'\\cite', | |
| r'\\cite\*': r'\\cite', | |
| r'\\citealt\*': r'\\cite', | |
| r'\\citealt': r'\\cite', | |
| r'\\citealtp\*': r'\\cite', | |
| r'\\citealp': r'\\cite', | |
| r'\\citeyear\*': r'\\cite', | |
| r'\\citeyear': r'\\cite', | |
| r'\\citeauthor\*': r'\\cite', | |
| r'\\citeauthor': r'\\cite', | |
| r'\\citenum\*': r'\\cite', | |
| r'\\citenum': r'\\cite' | |
| } | |
| for key in pattern: | |
| s = re.sub(key, pattern[key], s) | |
| return s | |
| def find_bib(directory): | |
| file_paths = [] | |
| for root, _, files in os.walk(directory): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| file_paths.append(file_path) | |
| bib_paths = [f for f in file_paths if f.endswith('.bib')] | |
| return bib_paths | |
| def create_bib_from_bbl(bibfile): | |
| with open(bibfile, 'r') as f: | |
| content = f.read() | |
| library_raw = bibtexparser.parse_string(content) | |
| library = {} | |
| for block in library_raw.blocks: | |
| if isinstance( | |
| block, | |
| (bibtexparser.model.DuplicateBlockKeyBlock, bibtexparser.model.ParsingFailedBlock, bibtexparser.model.ImplicitComment) | |
| ): | |
| continue | |
| fields = {} | |
| for field in block.fields: | |
| fields[field.key] = field.value | |
| ## Get a good title one ## | |
| field_content = fields["note"] | |
| field_content = field_content.replace("\n", " ") | |
| field_content = re.sub(" +", " ", field_content) | |
| if field_content.find("``") != -1 and field_content.find("\'\'") != -1: | |
| title = ( | |
| field_content[field_content.find("``") + 2 : field_content.find("\'\'")] | |
| .replace("\\emph", "") | |
| .replace("\\emp", "") | |
| .replace("\\em", "") | |
| .replace(",", "") | |
| .replace("{", "") | |
| .replace("}","") | |
| .replace("``", "") | |
| .replace("\'\'", "") | |
| .strip(".") | |
| .strip() | |
| .strip(".") | |
| .lower() | |
| ) | |
| fields['title'] = title | |
| else: | |
| if field_content.count("\\newblock") == 2: | |
| field_content = field_content.replace("\\newblock", "``", 1) | |
| field_content = field_content.replace("\\newblock", "\'\'", 1) | |
| if field_content.find("``") != -1 and field_content.find("\'\'") != -1: | |
| title = ( | |
| field_content[field_content.find("``") + 2 : field_content.find("\'\'")] | |
| .replace("\\emph", "") | |
| .replace("\\emp", "") | |
| .replace("\\em", "") | |
| .replace(",", "") | |
| .replace("{", "") | |
| .replace("}","") | |
| .replace("``", "") | |
| .replace("\'\'", "") | |
| .strip(".") | |
| .strip() | |
| .strip(".") | |
| .lower() | |
| ) | |
| fields['title'] = title | |
| library[block.key] = fields | |
| return library | |
| def create_bib(bibfile): | |
| with open(bibfile, 'r') as f: | |
| content = f.read() | |
| library_raw = bibtexparser.parse_string(content) | |
| library = {} | |
| for block in library_raw.blocks: | |
| if isinstance( | |
| block, | |
| (bibtexparser.model.DuplicateBlockKeyBlock, bibtexparser.model.ParsingFailedBlock, bibtexparser.model.ImplicitComment) | |
| ): | |
| continue | |
| fields = {} | |
| for field in block.fields: | |
| fields[field.key] = field.value.replace('{', '').replace('}', '') | |
| if field.key == 'title': | |
| title = re.sub(r'[\n]+', ' ', field.value) # keep only one \n | |
| title = re.sub(r' +', ' ', title) | |
| fields[field.key] = ( | |
| title.replace("\\emph", "") | |
| .replace("\\emp", "") | |
| .replace("\\em", "") | |
| .replace(",", "") | |
| .replace("{", "") | |
| .replace("}", "") | |
| .strip(".") | |
| .strip() | |
| .strip(".") | |
| .lower() | |
| ) | |
| if 'title' not in fields: | |
| continue | |
| library[block.key] = fields | |
| return library | |
| def find_bbl(directory): | |
| file_paths = [] | |
| for root, _, files in os.walk(directory): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| file_paths.append(file_path) | |
| bib_paths = [f for f in file_paths if f.endswith('.bbl')] | |
| return bib_paths | |
| def textobib(file): | |
| oldpwd = os.getcwd() | |
| target_dir = os.path.dirname(file) + '/' | |
| target = target_dir + 'tex2bib' | |
| src = './tex2bib' | |
| shutil.copyfile(src, target) | |
| os.chdir(target_dir) | |
| output_file = os.path.splitext(os.path.basename(file))[0] + '.bib' | |
| os.system('perl tex2bib -i {} -o {}'.format(os.path.basename(file), output_file)) | |
| os.chdir(oldpwd) | |
| return target_dir + output_file | |
| def get_library_bib(bib_files): | |
| library = [] | |
| for bib_file in bib_files: | |
| library.append(create_bib(bib_file)) | |
| final_library = {} | |
| for d in library: | |
| final_library.update(d) | |
| return final_library | |
| def get_library_bbl(bbl_files): | |
| bib_files = [] | |
| for bbl_file in bbl_files: | |
| bib_files.append(textobib(bbl_file)) | |
| library = [] | |
| for bib_file in bib_files: | |
| library.append(create_bib_from_bbl(bib_file)) | |
| final_library = {} | |
| for d in library: | |
| final_library.update(d) | |
| return final_library | |