File size: 4,376 Bytes
350babd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | """
Build BibTeX string from fetched metadata (ground truth).
Single central builder: dispatches by source and formats one consistent style.
"""
from typing import Any, List, Optional
import re
def _escape(s: str) -> str:
"""Escape BibTeX special chars: \\ { }"""
if not s:
return ""
return s.replace("\\", "\\\\").replace("{", "\\{").replace("}", "\\}")
def _author_list_to_bibtex(authors: Any) -> str:
"""Convert authors (list or str) to BibTeX author field (Name1 and Name2)."""
if isinstance(authors, str):
return _escape(authors.strip())
if isinstance(authors, list):
return " and ".join(_escape(str(a).strip()) for a in authors if a)
return ""
def _first_author_last_name(authors: Any) -> str:
"""Get last name (last word) of first author for key generation."""
if isinstance(authors, str):
parts = authors.strip().split()
return parts[-1] if parts else "unknown"
if isinstance(authors, list) and authors:
first = str(authors[0]).strip()
parts = first.split()
return parts[-1] if parts else "unknown"
return "unknown"
def _bibtex_key(authors: Any, year: str) -> str:
"""Generate a safe BibTeX key: LastNameYear."""
last = _first_author_last_name(authors)
# Alphanumeric only for key
last = re.sub(r"[^a-zA-Z0-9]", "", last)
y = (year or "nodate").strip()
y = re.sub(r"[^0-9]", "", y)[:4] if y else "nodate"
return f"{last}{y}" if last else f"ref{y}"
def build_fetched_bibtex(source: str, result: Any) -> str:
"""
Build a BibTeX entry string from fetched metadata.
source: 'arxiv' | 'crossref' | 'scholar' | 'semantic_scholar' | 'openalex' | 'dblp'
result: the fetcher result object (ArxivMetadata, CrossRefResult, etc.)
"""
title = ""
authors: Any = []
year = ""
doi = ""
url = ""
venue = ""
entry_type = "misc"
if source == "arxiv":
title = getattr(result, "title", "") or ""
authors = getattr(result, "authors", []) or []
year = getattr(result, "year", "") or "" # property
doi = getattr(result, "doi", "") or ""
url = getattr(result, "abs_url", "") or ""
venue = getattr(result, "journal_ref", "") or ""
entry_type = "article" if venue else "misc"
elif source == "crossref":
title = getattr(result, "title", "") or ""
authors = getattr(result, "authors", []) or []
year = getattr(result, "year", "") or ""
doi = getattr(result, "doi", "") or ""
url = getattr(result, "url", "") or ""
venue = getattr(result, "container_title", "") or ""
entry_type = "article"
elif source == "scholar":
title = getattr(result, "title", "") or ""
authors = getattr(result, "authors", "") or ""
year = getattr(result, "year", "") or ""
url = getattr(result, "url", "") or ""
entry_type = "misc"
elif source == "semantic_scholar":
title = getattr(result, "title", "") or ""
authors = getattr(result, "authors", []) or []
year = getattr(result, "year", "") or ""
url = getattr(result, "url", "") or ""
entry_type = "misc"
elif source == "openalex":
title = getattr(result, "title", "") or ""
authors = getattr(result, "authors", []) or []
year = getattr(result, "year", "") or ""
doi = getattr(result, "doi", "") or ""
url = getattr(result, "url", "") or ""
entry_type = "misc"
elif source == "dblp":
title = getattr(result, "title", "") or ""
authors = getattr(result, "authors", []) or []
year = getattr(result, "year", "") or ""
doi = getattr(result, "doi", "") or ""
url = getattr(result, "url", "") or ""
entry_type = "misc"
else:
return ""
key = _bibtex_key(authors, year)
author_str = _author_list_to_bibtex(authors)
lines = [f" author = {{{author_str}}}", f" title = {{{_escape(title)}}}", f" year = {{{year or '?'}}}"]
if venue:
lines.append(f" journal = {{{_escape(venue)}}}")
if doi:
lines.append(f" doi = {{{_escape(doi)}}}")
if url:
lines.append(f" url = {{{_escape(url)}}}")
lines.append(f" note = {{Fetched from {source}}}")
return f"@{entry_type}{{{key},\n" + ",\n".join(lines) + "\n}"
|