razvan commited on
Commit
37098fa
·
verified ·
1 Parent(s): 3ab9f4d

Upload plugins/mlintern/skills/ml-intern-harness/scripts/papers.py with huggingface_hub

Browse files
plugins/mlintern/skills/ml-intern-harness/scripts/papers.py ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Paper research helper for ML Intern Codex.
3
+
4
+ Emulates the useful parts of upstream ml-intern's hf_papers tool with public
5
+ HTTP APIs: Hugging Face Papers, arXiv/ar5iv HTML, and Semantic Scholar.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import html.parser
12
+ import json
13
+ import os
14
+ import re
15
+ import sys
16
+ import urllib.error
17
+ import urllib.parse
18
+ import urllib.request
19
+ from concurrent.futures import ThreadPoolExecutor, as_completed
20
+ from typing import Any
21
+
22
+
23
+ HF_API = "https://huggingface.co/api"
24
+ ARXIV_HTML = "https://arxiv.org/html"
25
+ AR5IV_HTML = "https://ar5iv.labs.arxiv.org/html"
26
+ S2_API = "https://api.semanticscholar.org"
27
+ MAX_SECTION_TEXT_LEN = 8000
28
+
29
+
30
+ def request_json(url: str, params: dict[str, Any] | None = None, method: str = "GET", body: dict[str, Any] | None = None) -> Any:
31
+ if params:
32
+ url = f"{url}?{urllib.parse.urlencode({k: v for k, v in params.items() if v is not None})}"
33
+ data = None
34
+ headers = {"User-Agent": "ml-intern-codex/0.1"}
35
+ if body is not None:
36
+ data = json.dumps(body).encode("utf-8")
37
+ headers["Content-Type"] = "application/json"
38
+ s2_key = os.environ.get("S2_API_KEY")
39
+ if s2_key and url.startswith(S2_API):
40
+ headers["x-api-key"] = s2_key
41
+ request = urllib.request.Request(url, data=data, headers=headers, method=method)
42
+ try:
43
+ with urllib.request.urlopen(request, timeout=30) as response:
44
+ return json.loads(response.read().decode("utf-8"))
45
+ except urllib.error.HTTPError as exc:
46
+ text = exc.read().decode("utf-8", errors="replace")
47
+ raise RuntimeError(f"{url} returned HTTP {exc.code}: {text[:500]}") from exc
48
+
49
+
50
+ def request_text(url: str) -> str:
51
+ request = urllib.request.Request(url, headers={"User-Agent": "ml-intern-codex/0.1"})
52
+ with urllib.request.urlopen(request, timeout=30) as response:
53
+ return response.read().decode("utf-8", errors="replace")
54
+
55
+
56
+ def arxiv_s2_id(arxiv_id: str) -> str:
57
+ return f"ARXIV:{arxiv_id}"
58
+
59
+
60
+ def truncate(text: str, limit: int) -> str:
61
+ text = re.sub(r"\s+", " ", text).strip()
62
+ return text if len(text) <= limit else text[:limit].rstrip() + "..."
63
+
64
+
65
+ def paper_arxiv_id(paper: dict[str, Any]) -> str:
66
+ external = paper.get("externalIds") or paper.get("external_ids") or {}
67
+ return external.get("ArXiv") or paper.get("arxiv_id") or paper.get("id", "")
68
+
69
+
70
+ def format_hf_paper(paper: dict[str, Any], idx: int) -> str:
71
+ nested = paper.get("paper") if isinstance(paper.get("paper"), dict) else paper
72
+ title = nested.get("title") or paper.get("title") or "(untitled)"
73
+ arxiv_id = nested.get("id") or nested.get("arxivId") or paper.get("id") or ""
74
+ summary = nested.get("summary") or nested.get("abstract") or ""
75
+ lines = [f"### {idx}. {title}"]
76
+ if arxiv_id:
77
+ lines.append(f"arxiv_id: {arxiv_id}")
78
+ lines.append(f"https://arxiv.org/abs/{arxiv_id}")
79
+ if nested.get("publishedAt"):
80
+ lines.append(f"Published: {nested['publishedAt']}")
81
+ if nested.get("githubUrl"):
82
+ lines.append(f"GitHub: {nested['githubUrl']}")
83
+ if summary:
84
+ lines.append(truncate(summary, 500))
85
+ return "\n".join(lines)
86
+
87
+
88
+ def format_s2_paper(paper: dict[str, Any], idx: int) -> str:
89
+ title = paper.get("title") or "(untitled)"
90
+ year = paper.get("year") or "?"
91
+ cites = paper.get("citationCount", 0)
92
+ venue = paper.get("venue") or ""
93
+ arxiv_id = paper_arxiv_id(paper)
94
+ tldr = (paper.get("tldr") or {}).get("text", "")
95
+ parts = [f"Year: {year}", f"Citations: {cites}"]
96
+ if venue:
97
+ parts.append(f"Venue: {venue}")
98
+ if arxiv_id:
99
+ parts.append(f"arxiv_id: {arxiv_id}")
100
+ lines = [f"### {idx}. {title}", " | ".join(parts)]
101
+ if arxiv_id:
102
+ lines.append(f"https://arxiv.org/abs/{arxiv_id}")
103
+ if tldr:
104
+ lines.append(f"TL;DR: {tldr}")
105
+ return "\n".join(lines)
106
+
107
+
108
+ class ArxivHTMLParser(html.parser.HTMLParser):
109
+ def __init__(self) -> None:
110
+ super().__init__()
111
+ self.capture_title = False
112
+ self.capture_abstract = False
113
+ self.capture_heading = False
114
+ self.capture_paragraph = False
115
+ self.title_parts: list[str] = []
116
+ self.abstract_parts: list[str] = []
117
+ self.sections: list[dict[str, Any]] = []
118
+ self.current_heading: list[str] = []
119
+ self.current_paragraph: list[str] = []
120
+ self.current_section: dict[str, Any] | None = None
121
+
122
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
123
+ classes = dict(attrs).get("class", "") or ""
124
+ if tag == "h1" and "ltx_title" in classes:
125
+ self.capture_title = True
126
+ elif tag == "div" and "ltx_abstract" in classes:
127
+ self.capture_abstract = True
128
+ elif tag in {"h2", "h3"} and "ltx_title" in classes:
129
+ self.capture_heading = True
130
+ self.current_heading = []
131
+ elif tag == "p":
132
+ self.capture_paragraph = True
133
+ self.current_paragraph = []
134
+
135
+ def handle_endtag(self, tag: str) -> None:
136
+ if tag == "h1" and self.capture_title:
137
+ self.capture_title = False
138
+ elif tag == "div" and self.capture_abstract:
139
+ self.capture_abstract = False
140
+ elif tag in {"h2", "h3"} and self.capture_heading:
141
+ heading = truncate(" ".join(self.current_heading), 500)
142
+ section_id = ""
143
+ match = re.match(r"^([A-Z]?\d+(?:\.\d+)*)\s", heading)
144
+ if match:
145
+ section_id = match.group(1)
146
+ self.current_section = {"id": section_id, "title": heading, "text": ""}
147
+ self.sections.append(self.current_section)
148
+ self.capture_heading = False
149
+ elif tag == "p" and self.capture_paragraph:
150
+ paragraph = truncate(" ".join(self.current_paragraph), 4000)
151
+ if paragraph:
152
+ if self.capture_abstract:
153
+ self.abstract_parts.append(paragraph)
154
+ elif self.current_section is not None:
155
+ existing = self.current_section["text"]
156
+ self.current_section["text"] = (existing + "\n\n" + paragraph).strip()
157
+ self.capture_paragraph = False
158
+
159
+ def handle_data(self, data: str) -> None:
160
+ text = data.strip()
161
+ if not text:
162
+ return
163
+ if self.capture_title:
164
+ self.title_parts.append(text.removeprefix("Title:"))
165
+ if self.capture_heading:
166
+ self.current_heading.append(text)
167
+ if self.capture_paragraph:
168
+ self.current_paragraph.append(text)
169
+
170
+
171
+ def parse_arxiv_html(html_text: str) -> dict[str, Any]:
172
+ parser = ArxivHTMLParser()
173
+ parser.feed(html_text)
174
+ return {
175
+ "title": truncate(" ".join(parser.title_parts), 500),
176
+ "abstract": truncate(" ".join(parser.abstract_parts), 2000),
177
+ "sections": parser.sections,
178
+ }
179
+
180
+
181
+ def op_trending(args: argparse.Namespace) -> str:
182
+ params: dict[str, Any] = {"limit": args.limit * 3 if args.query else args.limit}
183
+ if args.date:
184
+ params["date"] = args.date
185
+ papers = request_json(f"{HF_API}/daily_papers", params)
186
+ if args.query:
187
+ needle = args.query.lower()
188
+ papers = [
189
+ paper
190
+ for paper in papers
191
+ if needle in json.dumps(paper, ensure_ascii=False).lower()
192
+ ]
193
+ lines = ["# Trending Papers"]
194
+ for idx, paper in enumerate(papers[: args.limit], 1):
195
+ lines.append(format_hf_paper(paper, idx))
196
+ lines.append("")
197
+ return "\n".join(lines)
198
+
199
+
200
+ def op_search(args: argparse.Namespace) -> str:
201
+ if not args.query:
202
+ raise SystemExit("search requires --query")
203
+ use_s2 = any([args.date_from, args.date_to, args.categories, args.min_citations, args.sort_by != "relevance"])
204
+ if use_s2:
205
+ params: dict[str, Any] = {
206
+ "query": args.query,
207
+ "limit": args.limit,
208
+ "fields": "title,externalIds,year,citationCount,tldr,venue,publicationDate",
209
+ }
210
+ if args.date_from or args.date_to:
211
+ params["publicationDateOrYear"] = f"{args.date_from or ''}:{args.date_to or ''}"
212
+ if args.categories:
213
+ params["fieldsOfStudy"] = args.categories
214
+ if args.min_citations:
215
+ params["minCitationCount"] = str(args.min_citations)
216
+ if args.sort_by != "relevance":
217
+ params["sort"] = f"{args.sort_by}:desc"
218
+ data = request_json(f"{S2_API}/graph/v1/paper/search/bulk", params)
219
+ papers = data.get("data", [])
220
+ lines = [f"# Papers matching '{args.query}' (Semantic Scholar)"]
221
+ for idx, paper in enumerate(papers[: args.limit], 1):
222
+ lines.append(format_s2_paper(paper, idx))
223
+ lines.append("")
224
+ return "\n".join(lines)
225
+ papers = request_json(f"{HF_API}/papers/search", {"q": args.query, "limit": args.limit})
226
+ lines = [f"# Papers matching '{args.query}' (Hugging Face Papers)"]
227
+ for idx, paper in enumerate(papers[: args.limit], 1):
228
+ lines.append(format_hf_paper(paper, idx))
229
+ lines.append("")
230
+ return "\n".join(lines)
231
+
232
+
233
+ def op_paper_details(args: argparse.Namespace) -> str:
234
+ if not args.arxiv_id:
235
+ raise SystemExit("paper_details requires --arxiv-id")
236
+ paper = request_json(f"{HF_API}/papers/{args.arxiv_id}")
237
+ lines = [f"# {paper.get('title', args.arxiv_id)}", f"https://huggingface.co/papers/{args.arxiv_id}", f"https://arxiv.org/abs/{args.arxiv_id}"]
238
+ for key in ("publishedAt", "submittedOnDailyAt", "githubUrl"):
239
+ if paper.get(key):
240
+ lines.append(f"{key}: {paper[key]}")
241
+ if paper.get("summary"):
242
+ lines.append("")
243
+ lines.append("## Abstract")
244
+ lines.append(paper["summary"])
245
+ if paper.get("ai_summary"):
246
+ lines.append("")
247
+ lines.append("## AI Summary")
248
+ lines.append(str(paper["ai_summary"]))
249
+ return "\n".join(lines)
250
+
251
+
252
+ def op_read_paper(args: argparse.Namespace) -> str:
253
+ if not args.arxiv_id:
254
+ raise SystemExit("read_paper requires --arxiv-id")
255
+ parsed = None
256
+ for base in (ARXIV_HTML, AR5IV_HTML):
257
+ try:
258
+ parsed = parse_arxiv_html(request_text(f"{base}/{args.arxiv_id}"))
259
+ if parsed["sections"]:
260
+ break
261
+ except Exception:
262
+ parsed = None
263
+ if not parsed or not parsed["sections"]:
264
+ return op_paper_details(args) + f"\n\nHTML sections unavailable. PDF: https://arxiv.org/pdf/{args.arxiv_id}"
265
+ if not args.section:
266
+ lines = [f"# {parsed['title'] or args.arxiv_id}", f"https://arxiv.org/abs/{args.arxiv_id}", "", "## Abstract", parsed["abstract"], "", "## Sections"]
267
+ for section in parsed["sections"]:
268
+ preview = truncate(section.get("text", ""), 280)
269
+ lines.append(f"- {section['title']}: {preview}")
270
+ return "\n".join(lines)
271
+ wanted = args.section.lower()
272
+ selected = None
273
+ for section in parsed["sections"]:
274
+ if section["id"].lower() == wanted or wanted in section["title"].lower():
275
+ selected = section
276
+ break
277
+ if not selected:
278
+ available = "\n".join(f"- {section['title']}" for section in parsed["sections"])
279
+ raise SystemExit(f"section not found. Available sections:\n{available}")
280
+ return "\n".join([
281
+ f"# {selected['title']}",
282
+ f"https://arxiv.org/abs/{args.arxiv_id}",
283
+ "",
284
+ truncate(selected.get("text", ""), MAX_SECTION_TEXT_LEN),
285
+ ])
286
+
287
+
288
+ def format_citation(entry: dict[str, Any]) -> str:
289
+ paper = entry.get("citingPaper") or entry.get("citedPaper") or {}
290
+ title = paper.get("title") or "(untitled)"
291
+ year = paper.get("year") or "?"
292
+ cites = paper.get("citationCount", 0)
293
+ arxiv_id = paper_arxiv_id(paper)
294
+ line = f"- {title} ({year}, {cites} cites)"
295
+ if arxiv_id:
296
+ line += f" arxiv:{arxiv_id}"
297
+ if entry.get("isInfluential"):
298
+ line += " [influential]"
299
+ contexts = entry.get("contexts") or []
300
+ if contexts:
301
+ line += f"\n > {truncate(contexts[0], 220)}"
302
+ return line
303
+
304
+
305
+ def op_citation_graph(args: argparse.Namespace) -> str:
306
+ if not args.arxiv_id:
307
+ raise SystemExit("citation_graph requires --arxiv-id")
308
+ fields = "title,externalIds,year,citationCount,influentialCitationCount,contexts,intents,isInfluential"
309
+ params = {"fields": fields, "limit": args.limit}
310
+ paths: dict[str, str] = {}
311
+ if args.direction in {"references", "both"}:
312
+ paths["References"] = f"/graph/v1/paper/{arxiv_s2_id(args.arxiv_id)}/references"
313
+ if args.direction in {"citations", "both"}:
314
+ paths["Citations"] = f"/graph/v1/paper/{arxiv_s2_id(args.arxiv_id)}/citations"
315
+ lines = [f"# Citation Graph for {args.arxiv_id}", f"https://arxiv.org/abs/{args.arxiv_id}"]
316
+ with ThreadPoolExecutor(max_workers=2) as pool:
317
+ futures = {pool.submit(request_json, f"{S2_API}{path}", params): name for name, path in paths.items()}
318
+ for future in as_completed(futures):
319
+ name = futures[future]
320
+ lines.append("")
321
+ lines.append(f"## {name}")
322
+ try:
323
+ data = future.result()
324
+ for entry in data.get("data", []):
325
+ lines.append(format_citation(entry))
326
+ except Exception as exc:
327
+ lines.append(f"Error: {exc}")
328
+ return "\n".join(lines)
329
+
330
+
331
+ def op_resources(args: argparse.Namespace) -> str:
332
+ if not args.arxiv_id:
333
+ raise SystemExit(f"{args.operation} requires --arxiv-id")
334
+ sort = {"downloads": "downloads", "likes": "likes", "trending": "trendingScore"}[args.sort]
335
+ calls: dict[str, tuple[str, dict[str, Any]]] = {}
336
+ if args.operation in {"find_datasets", "find_all_resources"}:
337
+ calls["Datasets"] = (f"{HF_API}/datasets", {"filter": f"arxiv:{args.arxiv_id}", "limit": args.limit, "sort": sort, "direction": -1})
338
+ if args.operation in {"find_models", "find_all_resources"}:
339
+ calls["Models"] = (f"{HF_API}/models", {"filter": f"arxiv:{args.arxiv_id}", "limit": args.limit, "sort": sort, "direction": -1})
340
+ if args.operation in {"find_collections", "find_all_resources"}:
341
+ calls["Collections"] = (f"{HF_API}/collections", {"paper": args.arxiv_id})
342
+ lines = [f"# Resources linked to paper {args.arxiv_id}", f"https://huggingface.co/papers/{args.arxiv_id}"]
343
+ with ThreadPoolExecutor(max_workers=3) as pool:
344
+ futures = {pool.submit(request_json, url, params): name for name, (url, params) in calls.items()}
345
+ for future in as_completed(futures):
346
+ name = futures[future]
347
+ lines.append("")
348
+ lines.append(f"## {name}")
349
+ try:
350
+ items = future.result()
351
+ for item in items[: args.limit]:
352
+ repo_id = item.get("id") or item.get("modelId") or item.get("slug") or item.get("title")
353
+ likes = item.get("likes")
354
+ downloads = item.get("downloads")
355
+ meta = []
356
+ if downloads is not None:
357
+ meta.append(f"downloads={downloads}")
358
+ if likes is not None:
359
+ meta.append(f"likes={likes}")
360
+ lines.append(f"- {repo_id}" + (f" ({', '.join(meta)})" if meta else ""))
361
+ except Exception as exc:
362
+ lines.append(f"Error: {exc}")
363
+ return "\n".join(lines)
364
+
365
+
366
+ def op_snippet_search(args: argparse.Namespace) -> str:
367
+ if not args.query:
368
+ raise SystemExit("snippet_search requires --query")
369
+ params: dict[str, Any] = {"query": args.query, "limit": args.limit, "fields": "title,externalIds,year,citationCount"}
370
+ if args.date_from or args.date_to:
371
+ params["publicationDateOrYear"] = f"{args.date_from or ''}:{args.date_to or ''}"
372
+ if args.categories:
373
+ params["fieldsOfStudy"] = args.categories
374
+ if args.min_citations:
375
+ params["minCitationCount"] = str(args.min_citations)
376
+ data = request_json(f"{S2_API}/graph/v1/snippet/search", params)
377
+ lines = [f"# Snippet Search: {args.query}"]
378
+ for idx, item in enumerate(data.get("data", [])[: args.limit], 1):
379
+ paper = item.get("paper") or {}
380
+ snippet = item.get("snippet") or {}
381
+ lines.append(f"### {idx}. {paper.get('title', '(untitled)')}")
382
+ arxiv_id = paper_arxiv_id(paper)
383
+ if arxiv_id:
384
+ lines.append(f"arxiv:{arxiv_id}")
385
+ if snippet.get("section"):
386
+ lines.append(f"Section: {snippet['section']}")
387
+ if snippet.get("text"):
388
+ lines.append(f"> {truncate(snippet['text'], 400)}")
389
+ lines.append("")
390
+ return "\n".join(lines)
391
+
392
+
393
+ def op_recommend(args: argparse.Namespace) -> str:
394
+ if not args.arxiv_id and not args.positive_ids:
395
+ raise SystemExit("recommend requires --arxiv-id or --positive-ids")
396
+ fields = "title,externalIds,year,citationCount,tldr,venue"
397
+ if args.positive_ids and not args.arxiv_id:
398
+ positive = [arxiv_s2_id(item.strip()) for item in args.positive_ids.split(",") if item.strip()]
399
+ negative = [arxiv_s2_id(item.strip()) for item in args.negative_ids.split(",") if item.strip()]
400
+ data = request_json(
401
+ f"{S2_API}/recommendations/v1/papers/",
402
+ {"fields": fields, "limit": args.limit},
403
+ method="POST",
404
+ body={"positivePaperIds": positive, "negativePaperIds": negative},
405
+ )
406
+ else:
407
+ data = request_json(
408
+ f"{S2_API}/recommendations/v1/papers/forpaper/{arxiv_s2_id(args.arxiv_id)}",
409
+ {"fields": fields, "limit": args.limit, "from": "recent"},
410
+ )
411
+ papers = data.get("recommendedPapers", [])
412
+ lines = ["# Recommended Papers"]
413
+ for idx, paper in enumerate(papers[: args.limit], 1):
414
+ lines.append(format_s2_paper(paper, idx))
415
+ lines.append("")
416
+ return "\n".join(lines)
417
+
418
+
419
+ def build_parser() -> argparse.ArgumentParser:
420
+ parser = argparse.ArgumentParser(description=__doc__)
421
+ parser.add_argument("operation", choices=[
422
+ "trending",
423
+ "search",
424
+ "paper_details",
425
+ "read_paper",
426
+ "citation_graph",
427
+ "snippet_search",
428
+ "recommend",
429
+ "find_datasets",
430
+ "find_models",
431
+ "find_collections",
432
+ "find_all_resources",
433
+ ])
434
+ parser.add_argument("--query")
435
+ parser.add_argument("--arxiv-id")
436
+ parser.add_argument("--section")
437
+ parser.add_argument("--direction", choices=["citations", "references", "both"], default="both")
438
+ parser.add_argument("--date")
439
+ parser.add_argument("--date-from", default="")
440
+ parser.add_argument("--date-to", default="")
441
+ parser.add_argument("--categories")
442
+ parser.add_argument("--min-citations", type=int)
443
+ parser.add_argument("--sort-by", choices=["relevance", "citationCount", "publicationDate"], default="relevance")
444
+ parser.add_argument("--positive-ids", default="")
445
+ parser.add_argument("--negative-ids", default="")
446
+ parser.add_argument("--sort", choices=["downloads", "likes", "trending"], default="downloads")
447
+ parser.add_argument("--limit", type=int, default=10)
448
+ return parser
449
+
450
+
451
+ def main() -> int:
452
+ args = build_parser().parse_args()
453
+ args.limit = min(max(args.limit, 1), 50)
454
+ handlers = {
455
+ "trending": op_trending,
456
+ "search": op_search,
457
+ "paper_details": op_paper_details,
458
+ "read_paper": op_read_paper,
459
+ "citation_graph": op_citation_graph,
460
+ "snippet_search": op_snippet_search,
461
+ "recommend": op_recommend,
462
+ "find_datasets": op_resources,
463
+ "find_models": op_resources,
464
+ "find_collections": op_resources,
465
+ "find_all_resources": op_resources,
466
+ }
467
+ try:
468
+ print(handlers[args.operation](args))
469
+ except Exception as exc:
470
+ print(f"Error running papers {args.operation}: {exc}", file=sys.stderr)
471
+ return 1
472
+ return 0
473
+
474
+
475
+ if __name__ == "__main__":
476
+ sys.exit(main())