Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,998 Bytes
228e882 12945f4 228e882 12945f4 9f6bc92 228e882 e3f383e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import re
SOURCE_FILTERS = {
"Stacks Project": {
"authors": False,
"types": True,
"tags": True,
"paper_filter": False,
"year": False,
"journal": False,
"citations": False,
},
"arXiv": {
"authors": True,
"types": True,
"tags": True,
"paper_filter": True,
"year": True,
"journal": True,
"citations": True,
},
"ProofWiki": {
"authors": False,
"types": True,
"tags": False,
"paper_filter": False,
"year": False,
"journal": False,
"citations": False,
},
"An Infinitely Large Napkin": {
"authors": False,
"types": True,
"tags": False,
"paper_filter": True,
"year": False,
"journal": False,
"citations": False,
},
"CRing Project": {
"authors": False,
"types": True,
"tags": False,
"paper_filter": True,
"year": False,
"journal": False,
"citations": False,
},
"HoTT Book": {
"authors": False,
"types": True,
"tags": False,
"paper_filter": False,
"year": False,
"journal": False,
"citations": False,
},
"Open Logic Project": {
"authors": False,
"types": True,
"tags": False,
"paper_filter": False,
"year": False,
"journal": False,
"citations": False,
}
}
def active_filters(selected_sources):
caps = {k: False for k in next(iter(SOURCE_FILTERS.values()))}
for s in selected_sources:
src_caps = SOURCE_FILTERS.get(s, {})
for k, v in src_caps.items():
caps[k] = caps[k] or v
return caps
def metadata_sources(selected_sources, source_caps):
return [
s for s in selected_sources
if source_caps.get(s, {}).get("has_metadata", False)
]
def serialize_filters(filters: dict) -> dict:
return {
"types": ",".join(filters.get("types", [])),
"tags": ",".join(filters.get("tags", [])),
"sources": ",".join(filters.get("sources", [])),
"paper_filter": ",".join(
list(filters.get("paper_filter", {}).get("ids", [])) +
list(filters.get("paper_filter", {}).get("titles", []))
),
"year_range": (
f"{filters['year_range'][0]}–{filters['year_range'][1]}"
if filters.get("year_range") else None
),
"citation_range": (
f"{filters['citation_range'][0]}–{filters['citation_range'][1]}"
if filters.get("citation_range") else None
),
"citation_weight": float(filters.get("citation_weight", 0.0)),
"include_unknown_citations": str(filters.get("include_unknown_citations")),
"top_k": int(filters.get("top_k", 0)),
}
def parse_paper_filter(raw: str) -> dict:
ids, titles = set(), set()
if not raw:
return {"ids": ids, "titles": titles}
for token in [t.strip() for t in raw.split(",") if t.strip()]:
def extract_arxiv_id(s: str) -> str | None:
if not s:
return None
arxiv_id_re = re.compile(
r'(?:arxiv\.org/(?:abs|pdf)/)?(\d{4}\.\d{4,5}|[a-z\-]+/\d{7})',
re.IGNORECASE
)
m = arxiv_id_re.search(s.strip())
return m.group(1) if m else None
arx = extract_arxiv_id(token)
if arx:
ids.add(arx.lower())
else:
def normalize_title(s: str) -> str:
return (s or "").casefold().strip()
titles.add(normalize_title(token))
return {"ids": ids, "titles": titles}
def json_safe(obj):
if isinstance(obj, dict):
return {k: json_safe(v) for k, v in obj.items()}
if isinstance(obj, list):
return [json_safe(v) for v in obj]
if isinstance(obj, set):
return sorted(json_safe(v) for v in obj)
return obj |