Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import bibtexparser | |
| import re | |
| import os | |
| custom_config = bibtexparser.bparser.BibTexParser(common_strings=True) | |
| custom_config.ignore_nonstandard_types = False | |
| LOCATION_ABBREVIATIONS = { | |
| # Countries and regions | |
| "USA", "UK", "UAE", "EU", "NZ", "SA", "RSA", "USSR", "PRC", | |
| # US states | |
| "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", | |
| "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", | |
| "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", | |
| "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", | |
| "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY", | |
| # Canadian provinces | |
| "AB", "BC", "MB", "NB", "NL", "NS", "NT", "NU", "ON", "PE", "QC", "SK", "YT", | |
| # Major cities and their common abbreviations | |
| "NYC", "LA", "SF", "CHI", "DC", "LDN", # New York City, Los Angeles, San Francisco, Chicago, D.C., London | |
| # International organizations and bodies | |
| "UN", "NATO", "ASEAN", "OPEC", | |
| # Continents and major regions | |
| "NA", "SA", "EU", "AS", "AF", "OC", "AN", # North America, South America, Europe, Asia, Africa, Oceania, Antarctica | |
| # Some major cities around the world | |
| "HK", "TPE", "TOK", "SYD", # Hong Kong, Taipei, Tokyo, Sydney | |
| } | |
| def is_location_abbreviation(segment: str) -> bool: | |
| """ | |
| Check if the segment contains a location abbreviation. | |
| """ | |
| matches = re.findall(r"\{(\w+)\}", segment) | |
| for match in matches: | |
| if match in LOCATION_ABBREVIATIONS: | |
| return True | |
| return False | |
| def simplify_booktitle(booktitle: str, year: str = None) -> str: | |
| """ | |
| Simplifies the booktitle by: | |
| 1. Keeping only the first segment (before the comma). | |
| 2. If any later segment contains '{}', it is retained unless it's a location abbreviation. | |
| 3. If a year is provided, removing that year substring from any segment. | |
| """ | |
| segments = booktitle.split(',') | |
| # Keeping the first segment | |
| simplified_title = [segments[0].strip()] | |
| # Checking the remaining segments | |
| for segment in segments[1:]: | |
| if '{' in segment and '}' in segment and not is_location_abbreviation(segment): | |
| simplified_title.append(segment.strip()) | |
| # If year is provided, remove it from any segment | |
| if year: | |
| simplified_title = [segment.replace(year, '').strip() for segment in simplified_title] | |
| return ', '.join(simplified_title) | |
| def simplify_bibtex(bibtex_str: str) -> str: | |
| bib_database = bibtexparser.loads(bibtex_str, parser=custom_config) | |
| for entry in bib_database.entries: | |
| if entry['ENTRYTYPE'] not in ['book', 'inproceedings', 'article', 'misc', 'incollection']: | |
| continue | |
| if 'booktitle' in entry: | |
| year_value = entry.get('year', None) | |
| entry['booktitle'] = simplify_booktitle(entry['booktitle'], year_value) | |
| desired_fields = ['author', 'title', 'journal', 'booktitle', 'volume', 'pages', 'year', 'ENTRYTYPE', 'ID'] | |
| keys_to_remove = [key for key in entry if key not in desired_fields] | |
| for key in keys_to_remove: | |
| del entry[key] | |
| return bibtexparser.dumps(bib_database) | |
| def gradio_wrapper(bibtex_str: str) -> str: | |
| result = simplify_bibtex(bibtex_str) | |
| return result | |
| # Define the Gradio interface | |
| interface = gr.Interface( | |
| fn=gradio_wrapper, | |
| inputs=gr.inputs.Textbox(lines=20, placeholder="Enter your BibTeX here..."), | |
| outputs=gr.outputs.Textbox(label="Simplified BibTeX") | |
| ) | |
| interface.launch() |