File size: 3,998 Bytes
228e882
 
 
 
 
 
 
12945f4
228e882
 
 
 
 
 
 
 
 
 
 
 
 
12945f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f6bc92
228e882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3f383e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import re

SOURCE_FILTERS = {
    "Stacks Project": {
        "authors": False,
        "types": True,
        "tags": True,
        "paper_filter": False,
        "year": False,
        "journal": False,
        "citations": False,
    },
    "arXiv": {
        "authors": True,
        "types": True,
        "tags": True,
        "paper_filter": True,
        "year": True,
        "journal": True,
        "citations": True,
    },
    "ProofWiki": {
        "authors": False,
        "types": True,
        "tags": False,
        "paper_filter": False,
        "year": False,
        "journal": False,
        "citations": False,
    },
    "An Infinitely Large Napkin": {
        "authors": False,
        "types": True,
        "tags": False,
        "paper_filter": True,
        "year": False,
        "journal": False,
        "citations": False,
    },
    "CRing Project": {
        "authors": False,
        "types": True,
        "tags": False,
        "paper_filter": True,
        "year": False,
        "journal": False,
        "citations": False,
    },
    "HoTT Book": {
        "authors": False,
        "types": True,
        "tags": False,
        "paper_filter": False,
        "year": False,
        "journal": False,
        "citations": False,
    },
    "Open Logic Project": {
        "authors": False,
        "types": True,
        "tags": False,
        "paper_filter": False,
        "year": False,
        "journal": False,
        "citations": False,
    }
}

def active_filters(selected_sources):
    caps = {k: False for k in next(iter(SOURCE_FILTERS.values()))}
    for s in selected_sources:
        src_caps = SOURCE_FILTERS.get(s, {})
        for k, v in src_caps.items():
            caps[k] = caps[k] or v
    return caps

def metadata_sources(selected_sources, source_caps):
    return [
        s for s in selected_sources
        if source_caps.get(s, {}).get("has_metadata", False)
    ]

def serialize_filters(filters: dict) -> dict:
    return {
        "types": ",".join(filters.get("types", [])),
        "tags": ",".join(filters.get("tags", [])),
        "sources": ",".join(filters.get("sources", [])),
        "paper_filter": ",".join(
            list(filters.get("paper_filter", {}).get("ids", [])) +
            list(filters.get("paper_filter", {}).get("titles", []))
        ),
        "year_range": (
            f"{filters['year_range'][0]}{filters['year_range'][1]}"
            if filters.get("year_range") else None
        ),
        "citation_range": (
            f"{filters['citation_range'][0]}{filters['citation_range'][1]}"
            if filters.get("citation_range") else None
        ),
        "citation_weight": float(filters.get("citation_weight", 0.0)),
        "include_unknown_citations": str(filters.get("include_unknown_citations")),
        "top_k": int(filters.get("top_k", 0)),
    }

def parse_paper_filter(raw: str) -> dict:
    ids, titles = set(), set()
    if not raw:
        return {"ids": ids, "titles": titles}
    for token in [t.strip() for t in raw.split(",") if t.strip()]:
        def extract_arxiv_id(s: str) -> str | None:
            if not s:
                return None
            arxiv_id_re = re.compile(
                r'(?:arxiv\.org/(?:abs|pdf)/)?(\d{4}\.\d{4,5}|[a-z\-]+/\d{7})',
                re.IGNORECASE
            )
            m = arxiv_id_re.search(s.strip())
            return m.group(1) if m else None

        arx = extract_arxiv_id(token)
        if arx:
            ids.add(arx.lower())
        else:
            def normalize_title(s: str) -> str:
                return (s or "").casefold().strip()

            titles.add(normalize_title(token))
    return {"ids": ids, "titles": titles}

def json_safe(obj):
    if isinstance(obj, dict):
        return {k: json_safe(v) for k, v in obj.items()}
    if isinstance(obj, list):
        return [json_safe(v) for v in obj]
    if isinstance(obj, set):
        return sorted(json_safe(v) for v in obj)
    return obj