|
|
import os |
|
|
import uuid |
|
|
import gradio as gr |
|
|
import rebiber |
|
|
|
|
|
|
|
|
PACKAGE_ROOT = os.path.abspath(rebiber.__file__).replace("__init__.py", "") |
|
|
bib_db = rebiber.construct_bib_db( |
|
|
os.path.join(PACKAGE_ROOT, "bib_list.txt"), start_dir=PACKAGE_ROOT |
|
|
) |
|
|
abbr_dict = rebiber.normalize.load_abbr_tsv("abbr.tsv") |
|
|
|
|
|
|
|
|
def process(input_bib: str, |
|
|
shorten: bool, |
|
|
remove_keys: list[str], |
|
|
deduplicate: bool, |
|
|
sort_ids: bool): |
|
|
"""Normalize a BibTeX string and write it to disk.""" |
|
|
if "@" not in input_bib: |
|
|
|
|
|
return "N/A", "", gr.Button.update(visible=False) |
|
|
|
|
|
run_id = uuid.uuid4().hex |
|
|
in_file = f"input_{run_id}.bib" |
|
|
out_file = f"output_{run_id}.bib" |
|
|
|
|
|
|
|
|
with open(in_file, "w") as f: |
|
|
f.write(input_bib.replace("\t", " ")) |
|
|
|
|
|
entries = rebiber.load_bib_file(in_file) |
|
|
rebiber.normalize_bib( |
|
|
bib_db, |
|
|
entries, |
|
|
out_file, |
|
|
abbr_dict = abbr_dict if shorten else [], |
|
|
deduplicate = deduplicate, |
|
|
sort = sort_ids, |
|
|
removed_value_names = remove_keys, |
|
|
) |
|
|
|
|
|
with open(out_file) as f: |
|
|
output_bib = f.read().replace("\n ", "\n ") |
|
|
|
|
|
|
|
|
return output_bib, run_id, gr.update(visible=True) |
|
|
|
|
|
|
|
|
def download_file(run_id: str): |
|
|
"""Expose the normalized .bib for download.""" |
|
|
file_path = f"output_{run_id}.bib" |
|
|
|
|
|
return file_path, gr.update(visible=True) |
|
|
|
|
|
|
|
|
|
|
|
EXAMPLE = """ |
|
|
@article{lin2020birds, |
|
|
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models}, |
|
|
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang}, |
|
|
journal={arXiv preprint arXiv:2005.00683}, |
|
|
year={2020} |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(title="Rebiber", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# **Rebiber** – normalise those BibTeX entries! |
|
|
|
|
|
🐼 [Project page](https://yuchenlin.xyz/) | |
|
|
:octocat: [GitHub](https://github.com/yuchenlin/rebiber) | |
|
|
🐤 [Tweet](https://twitter.com/billyuchenlin/status/1353850378438070272) |
|
|
|
|
|
Rebiber replaces arXiv citations with their official venue versions (DBLP / ACL |
|
|
Anthology), deduplicates, sorts, and can abbreviate conference names. |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
input_bib = gr.Textbox( |
|
|
label="Input BibTeX", |
|
|
value=EXAMPLE, |
|
|
lines=15, |
|
|
interactive=True, |
|
|
) |
|
|
removekeys = gr.CheckboxGroup( |
|
|
["url", "biburl", "address", "publisher", |
|
|
"pages", "doi", "volume", "bibsource"], |
|
|
label="Remove fields", |
|
|
info="Select the keys you’d like to strip", |
|
|
) |
|
|
shorten = gr.Checkbox(label="Abbreviate venue names") |
|
|
dedup = gr.Checkbox(label="Deduplicate entries") |
|
|
sort = gr.Checkbox(label="Sort IDs alphabetically") |
|
|
|
|
|
with gr.Row(): |
|
|
clear_btn = gr.Button("Clear") |
|
|
submit_btn = gr.Button("Submit") |
|
|
|
|
|
run_uuid = gr.Textbox(visible=False) |
|
|
|
|
|
|
|
|
with gr.Column(scale=3): |
|
|
output_box = gr.Textbox( |
|
|
label="Normalised BibTeX", |
|
|
interactive=False, |
|
|
show_copy_button=True, |
|
|
) |
|
|
download_btn = gr.Button("Generate . bib file", visible=False) |
|
|
download_file_component = gr.File(visible=False) |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
process, |
|
|
inputs=[input_bib, shorten, removekeys, dedup, sort], |
|
|
outputs=[output_box, run_uuid, download_btn], |
|
|
api_name="process", |
|
|
) |
|
|
download_btn.click(download_file, run_uuid, [download_file_component, download_file_component]) |
|
|
clear_btn.click(lambda: "", None, input_bib) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |