Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import re | |
| SOURCE_FILTERS = { | |
| "Stacks Project": { | |
| "authors": False, | |
| "types": True, | |
| "tags": True, | |
| "paper_filter": False, | |
| "year": False, | |
| "journal": False, | |
| "citations": False, | |
| }, | |
| "arXiv": { | |
| "authors": True, | |
| "types": True, | |
| "tags": True, | |
| "paper_filter": True, | |
| "year": True, | |
| "journal": True, | |
| "citations": True, | |
| }, | |
| "ProofWiki": { | |
| "authors": False, | |
| "types": True, | |
| "tags": False, | |
| "paper_filter": False, | |
| "year": False, | |
| "journal": False, | |
| "citations": False, | |
| }, | |
| "An Infinitely Large Napkin": { | |
| "authors": False, | |
| "types": True, | |
| "tags": False, | |
| "paper_filter": True, | |
| "year": False, | |
| "journal": False, | |
| "citations": False, | |
| }, | |
| "CRing Project": { | |
| "authors": False, | |
| "types": True, | |
| "tags": False, | |
| "paper_filter": True, | |
| "year": False, | |
| "journal": False, | |
| "citations": False, | |
| }, | |
| "HoTT Book": { | |
| "authors": False, | |
| "types": True, | |
| "tags": False, | |
| "paper_filter": False, | |
| "year": False, | |
| "journal": False, | |
| "citations": False, | |
| }, | |
| "Open Logic Project": { | |
| "authors": False, | |
| "types": True, | |
| "tags": False, | |
| "paper_filter": False, | |
| "year": False, | |
| "journal": False, | |
| "citations": False, | |
| } | |
| } | |
| def active_filters(selected_sources): | |
| caps = {k: False for k in next(iter(SOURCE_FILTERS.values()))} | |
| for s in selected_sources: | |
| src_caps = SOURCE_FILTERS.get(s, {}) | |
| for k, v in src_caps.items(): | |
| caps[k] = caps[k] or v | |
| return caps | |
| def metadata_sources(selected_sources, source_caps): | |
| return [ | |
| s for s in selected_sources | |
| if source_caps.get(s, {}).get("has_metadata", False) | |
| ] | |
| def serialize_filters(filters: dict) -> dict: | |
| return { | |
| "types": ",".join(filters.get("types", [])), | |
| "tags": ",".join(filters.get("tags", [])), | |
| "sources": ",".join(filters.get("sources", [])), | |
| "paper_filter": ",".join( | |
| list(filters.get("paper_filter", {}).get("ids", [])) + | |
| list(filters.get("paper_filter", {}).get("titles", [])) | |
| ), | |
| "year_range": ( | |
| f"{filters['year_range'][0]}–{filters['year_range'][1]}" | |
| if filters.get("year_range") else None | |
| ), | |
| "citation_range": ( | |
| f"{filters['citation_range'][0]}–{filters['citation_range'][1]}" | |
| if filters.get("citation_range") else None | |
| ), | |
| "citation_weight": float(filters.get("citation_weight", 0.0)), | |
| "include_unknown_citations": str(filters.get("include_unknown_citations")), | |
| "top_k": int(filters.get("top_k", 0)), | |
| } | |
| def parse_paper_filter(raw: str) -> dict: | |
| ids, titles = set(), set() | |
| if not raw: | |
| return {"ids": ids, "titles": titles} | |
| for token in [t.strip() for t in raw.split(",") if t.strip()]: | |
| def extract_arxiv_id(s: str) -> str | None: | |
| if not s: | |
| return None | |
| arxiv_id_re = re.compile( | |
| r'(?:arxiv\.org/(?:abs|pdf)/)?(\d{4}\.\d{4,5}|[a-z\-]+/\d{7})', | |
| re.IGNORECASE | |
| ) | |
| m = arxiv_id_re.search(s.strip()) | |
| return m.group(1) if m else None | |
| arx = extract_arxiv_id(token) | |
| if arx: | |
| ids.add(arx.lower()) | |
| else: | |
| def normalize_title(s: str) -> str: | |
| return (s or "").casefold().strip() | |
| titles.add(normalize_title(token)) | |
| return {"ids": ids, "titles": titles} | |
| def json_safe(obj): | |
| if isinstance(obj, dict): | |
| return {k: json_safe(v) for k, v in obj.items()} | |
| if isinstance(obj, list): | |
| return [json_safe(v) for v in obj] | |
| if isinstance(obj, set): | |
| return sorted(json_safe(v) for v in obj) | |
| return obj |