evalstate commited on
Commit
8dd9efe
·
1 Parent(s): 376676a

Update Monty paper helpers and prompt surface

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. _monty_codegen_shared.md +203 -54
  2. hf-hub-query.md +18 -10
  3. monty_api/__pycache__/__init__.cpython-313.pyc +0 -0
  4. monty_api/__pycache__/__init__.cpython-314.pyc +0 -0
  5. monty_api/__pycache__/aliases.cpython-313.pyc +0 -0
  6. monty_api/__pycache__/aliases.cpython-314.pyc +0 -0
  7. monty_api/__pycache__/constants.cpython-313.pyc +0 -0
  8. monty_api/__pycache__/constants.cpython-314.pyc +0 -0
  9. monty_api/__pycache__/context_types.cpython-313.pyc +0 -0
  10. monty_api/__pycache__/context_types.cpython-314.pyc +0 -0
  11. monty_api/__pycache__/helper_contracts.cpython-313.pyc +0 -0
  12. monty_api/__pycache__/helper_contracts.cpython-314.pyc +0 -0
  13. monty_api/__pycache__/http_runtime.cpython-313.pyc +0 -0
  14. monty_api/__pycache__/http_runtime.cpython-314.pyc +0 -0
  15. monty_api/__pycache__/query_entrypoints.cpython-313.pyc +0 -0
  16. monty_api/__pycache__/query_entrypoints.cpython-314.pyc +0 -0
  17. monty_api/__pycache__/registry.cpython-313.pyc +0 -0
  18. monty_api/__pycache__/registry.cpython-314.pyc +0 -0
  19. monty_api/__pycache__/runtime_context.cpython-313.pyc +0 -0
  20. monty_api/__pycache__/runtime_context.cpython-314.pyc +0 -0
  21. monty_api/__pycache__/runtime_envelopes.cpython-313.pyc +0 -0
  22. monty_api/__pycache__/runtime_envelopes.cpython-314.pyc +0 -0
  23. monty_api/__pycache__/runtime_filtering.cpython-313.pyc +0 -0
  24. monty_api/__pycache__/runtime_filtering.cpython-314.pyc +0 -0
  25. monty_api/__pycache__/tool_entrypoints.cpython-313.pyc +0 -0
  26. monty_api/__pycache__/tool_entrypoints.cpython-314.pyc +0 -0
  27. monty_api/__pycache__/validation.cpython-313.pyc +0 -0
  28. monty_api/__pycache__/validation.cpython-314.pyc +0 -0
  29. monty_api/constants.py +9 -7
  30. monty_api/helper_contracts.py +32 -5
  31. monty_api/helpers/__init__.py +2 -0
  32. monty_api/helpers/__pycache__/__init__.cpython-313.pyc +0 -0
  33. monty_api/helpers/__pycache__/__init__.cpython-314.pyc +0 -0
  34. monty_api/helpers/__pycache__/activity.cpython-313.pyc +0 -0
  35. monty_api/helpers/__pycache__/activity.cpython-314.pyc +0 -0
  36. monty_api/helpers/__pycache__/collections.cpython-313.pyc +0 -0
  37. monty_api/helpers/__pycache__/collections.cpython-314.pyc +0 -0
  38. monty_api/helpers/__pycache__/common.cpython-313.pyc +0 -0
  39. monty_api/helpers/__pycache__/common.cpython-314.pyc +0 -0
  40. monty_api/helpers/__pycache__/introspection.cpython-313.pyc +0 -0
  41. monty_api/helpers/__pycache__/introspection.cpython-314.pyc +0 -0
  42. monty_api/helpers/__pycache__/profiles.cpython-313.pyc +0 -0
  43. monty_api/helpers/__pycache__/profiles.cpython-314.pyc +0 -0
  44. monty_api/helpers/__pycache__/repos.cpython-313.pyc +0 -0
  45. monty_api/helpers/__pycache__/repos.cpython-314.pyc +0 -0
  46. monty_api/helpers/introspection.py +4 -2
  47. monty_api/helpers/papers.py +318 -0
  48. monty_api/helpers/profiles.py +18 -8
  49. monty_api/helpers/repos.py +0 -58
  50. monty_api/http_runtime.py +0 -41
_monty_codegen_shared.md CHANGED
@@ -3,24 +3,31 @@
3
  - You are writing Python to be executed in a secure runtime environment.
4
  - **NEVER** use `import` - it is NOT available in this environment.
5
  - All helper calls are async: always use `await`.
6
- - Use this exact outer shape:
7
 
8
  ```py
9
- async def solve(query, max_calls):
10
- ...
11
-
12
- await solve(query, max_calls)
13
  ```
14
 
 
15
  - `max_calls` is the total external-call budget for the whole program.
 
 
 
 
16
  - Use only documented `hf_*` helpers.
17
- - Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
18
- - Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
19
- - Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
20
- - If the user says "return only" some fields, return exactly that final shape.
21
- - If a helper already returns the requested row shape, return `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, return `{"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
22
  - For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
23
- - If a current-user helper returns `ok=false`, return that helper response directly.
 
 
 
 
24
 
25
  ## Search rules
26
 
@@ -41,35 +48,81 @@ await solve(query, max_calls)
41
  - `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
42
  - When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
43
  - For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
 
44
  - Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
45
-
46
  - Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
 
47
  - Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
48
- - Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
 
 
 
 
49
  - For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
50
  - For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
51
  - Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
52
  - Push constraints upstream whenever a first-class helper argument exists.
53
  - `post_filter` is only for normalized row filters that cannot be pushed upstream.
 
54
  - Keep `post_filter` simple:
55
  - exact match or `in` for returned fields like `runtime_stage`
56
  - `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
 
57
  - `num_params` is one of the main valid reasons to use `post_filter` on model search today.
58
  - Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  Examples:
61
 
62
  ```py
63
- await hf_models_search(pipeline_tag="text-to-image", limit=10)
64
- await hf_datasets_search(search="speech", sort="downloads", limit=10)
65
- await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
66
- await hf_models_search(
 
 
67
  pipeline_tag="text-generation",
68
  sort="trending_score",
69
  limit=50,
70
  post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
71
  )
72
- await hf_collections_search(owner="Qwen", limit=10)
 
 
 
 
 
73
  ```
74
 
75
  Field-only pattern:
@@ -80,7 +133,8 @@ resp = await hf_models_search(
80
  fields=["repo_id", "author", "likes", "downloads", "repo_url"],
81
  limit=3,
82
  )
83
- return resp["items"]
 
84
  ```
85
 
86
  Coverage pattern:
@@ -93,7 +147,8 @@ resp = await hf_user_likes(
93
  limit=20,
94
  fields=["repo_id", "repo_likes", "repo_url"],
95
  )
96
- return {"results": resp["items"], "coverage": resp["meta"]}
 
97
  ```
98
 
99
  Owner-inventory pattern:
@@ -109,33 +164,64 @@ resp = await hf_spaces_search(
109
  )
110
  meta = resp.get("meta") or {}
111
  if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
112
- return {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
113
- return resp["items"]
 
 
114
  ```
115
 
116
- Profile-count pattern:
117
 
118
  ```py
119
- profile = await hf_profile_summary(handle="mishig")
120
- item = profile["item"] or {}
121
- return {
122
- "followers_count": item.get("followers_count"),
123
- "following_count": item.get("following_count"),
124
- }
125
- ```
126
-
127
- Pro-followers pattern:
128
-
129
- ```py
130
- followers = await hf_user_graph(
131
  relation="followers",
132
- pro_only=True,
133
- limit=20,
134
- fields=["username"],
135
  )
136
- return followers["items"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  ```
138
 
 
 
 
 
 
139
  ## Navigation graph
140
 
141
  Use the helper that matches the question type.
@@ -146,11 +232,14 @@ Use the helper that matches the question type.
146
  - space search/list/discovery → `hf_spaces_search(...)`
147
  - cross-type repo search → `hf_repo_search(...)`
148
  - trending repos → `hf_trending(...)`
149
- - daily papers → `hf_daily_papers(...)`
 
 
 
150
  - repo discussions → `hf_repo_discussions(...)`
151
  - specific discussion details → `hf_repo_discussion_details(...)`
152
  - users who liked one repo → `hf_repo_likers(...)`
153
- - profile / overview / aggregate counts → `hf_profile_summary(...)`
154
  - followers / following lists → `hf_user_graph(...)`
155
  - repos a user liked → `hf_user_likes(...)`
156
  - recent activity feed → `hf_recent_activity(...)`
@@ -182,16 +271,12 @@ Rules:
182
  - `items` is the canonical list field.
183
  - `item` is just a singleton convenience.
184
  - `meta` contains helper-owned execution, limit, and coverage info.
185
- - When helper-owned coverage matters, prefer returning the helper envelope directly.
186
 
187
  ## High-signal output rules
188
 
189
  - Prefer compact dict/list outputs over prose when the user asked for fields.
190
- - Prefer summary helpers before detail hydration.
191
  - Use canonical snake_case keys in generated code and structured output.
192
  - Use `repo_id` as the display label for repos.
193
- - Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
194
- - For selective one-shot search helpers, treat `meta.limit_boundary_hit=true` as a partial/unknown-coverage warning even if `meta.truncated` is still `false`.
195
  - For joins/intersections/rankings, fetch the needed working set first and compute locally.
196
  - If the result is partial, use top-level keys `results` and `coverage`.
197
 
@@ -205,7 +290,7 @@ await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' =
205
 
206
  await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
207
 
208
- await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
209
 
210
  await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
211
 
@@ -213,8 +298,14 @@ await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | N
213
 
214
  await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
215
 
 
 
 
 
216
  await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
217
 
 
 
218
  await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
219
 
220
  await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
@@ -296,24 +387,27 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
296
  ### hf_daily_papers
297
 
298
  - category: `curated_feed`
 
299
  - returns:
300
  - envelope: `{ok, item, items, meta, error}`
301
- - row_type: `daily_paper`
302
- - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
303
- - guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
304
- - optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
305
- - supported_params: `limit`, `where`, `fields`
 
 
306
  - fields_contract:
307
- - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
308
  - canonical_only: `true`
309
  - where_contract:
310
- - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
311
  - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
312
  - normalized_only: `true`
313
  - limit_contract:
314
  - default_limit: `20`
315
  - max_limit: `500`
316
- - notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
317
 
318
  ### hf_datasets_search
319
 
@@ -388,6 +482,45 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
388
  - scan_max: `10000`
389
  - notes: Returns organization member summary rows.
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  ### hf_profile_summary
392
 
393
  - category: `profile_summary`
@@ -402,6 +535,22 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
402
  - include: `likes`, `activity`
403
  - notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  ### hf_recent_activity
406
 
407
  - category: `activity_feed`
 
3
  - You are writing Python to be executed in a secure runtime environment.
4
  - **NEVER** use `import` - it is NOT available in this environment.
5
  - All helper calls are async: always use `await`.
6
+ - Write a top-level Monty Python script. Use a shape like:
7
 
8
  ```py
9
+ resp = await hf_models_search(limit=min(max_calls, 10))
10
+ result = resp["items"]
11
+ result
 
12
  ```
13
 
14
+ - `max_calls` is a runtime-provided top-level input.
15
  - `max_calls` is the total external-call budget for the whole program.
16
+ - Always assign the final output to `result`.
17
+ - End the script with a final line containing only `result`.
18
+ - Never stop after `result = ...`; always add a final bare `result` line.
19
+ - Do **not** define or call `solve(...)`.
20
  - Use only documented `hf_*` helpers.
21
+ - `result` must be plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
22
+ - Do **not** hand-build JSON strings, markdown strings, or your own transport wrapper like `{result: ..., meta: ...}` unless the user explicitly asked for prose.
23
+ - If the user says "return only" some fields, make `result` exactly that shape.
24
+ - If a helper already returns the requested row shape, use `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, set `result = {"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
 
25
  - For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
26
+ - For current-user follower/following aggregation prompts, prefer `hf_user_graph(relation=..., ...)` directly instead of `hf_whoami()` plus a second graph call. This saves a call and avoids unnecessary branching.
27
+ - If a current-user helper returns `ok=false`, assign that helper response to `result`.
28
+ - For relationship / aggregation questions (followers, members, likes, likers, intersections), preserve attribution in `result` unless the user explicitly asked for a collapsed deduped list.
29
+ - Do **not** choose tiny hard-coded limits like `5` for follower/member/likes aggregation unless the user explicitly asked for a tiny sample. Prefer larger limits and preserve coverage when partial.
30
+ - If you branch on an error path, you must still end the module with a final top-level bare `result` line outside every `if` / loop.
31
 
32
  ## Search rules
33
 
 
48
  - `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
49
  - When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
50
  - For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
51
+ - For follower/following/member/liker queries that require local filtering on actor fields such as `username` or `fullname`, prefer a bounded scan like `limit=100` / `scan_limit=100` by default, or at most about `200` when a slightly broader sample is justified. Do **not** jump to `1000` unless the user explicitly asked for exhaustive coverage or a very large sample.
52
  - Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
 
53
  - Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
54
+ - `hf_collections_search(owner=...)` filters owners case-insensitively, so preserve the user-provided owner spelling but use the owner argument directly.
55
  - Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
56
+ - For paper discovery, use `hf_papers_search(...)` for search, `hf_daily_papers(...)` for the curated daily feed, `hf_paper_info(...)` for exact metadata, and `hf_read_paper(...)` for markdown content.
57
+ - The main Hub-native join points on paper rows are `organization`, `submitted_by`, and `author_usernames`. Papers do not expose first-class model/dataset/space repo IDs.
58
+ - For profile/detail/social questions about a user or org — bio, description, display name, website, GitHub, Twitter/X, LinkedIn, Bluesky, organizations, or pro status — use `hf_profile_summary(...)` first.
59
+ - For join-style questions that need profile details for followers, following, members, likers, or other actor lists, first fetch a **bounded** actor list, filter locally on actor fields like `username` / `fullname`, then hydrate only the bounded matches with `hf_profile_summary(...)`.
60
+ - Do **not** set the initial actor-list limit equal to the whole remaining call budget when each match needs a follow-up profile lookup; reserve budget for the profile-detail calls and return coverage if the hydration step is partial.
61
  - For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
62
  - For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
63
  - Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
64
  - Push constraints upstream whenever a first-class helper argument exists.
65
  - `post_filter` is only for normalized row filters that cannot be pushed upstream.
66
+ - For created/updated date constraints, pair local `post_filter` with the matching sort (`created_at` or `last_modified`). Do **not** rely on date-only `post_filter` over an unsorted repo search window.
67
  - Keep `post_filter` simple:
68
  - exact match or `in` for returned fields like `runtime_stage`
69
  - `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
70
+ - `gte` / `lte` also work for normalized ISO timestamp fields like `created_at` and `last_modified`
71
  - `num_params` is one of the main valid reasons to use `post_filter` on model search today.
72
  - Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
73
 
74
+ ## Common repo fields
75
+
76
+ - `repo_id`
77
+ - `repo_type`
78
+ - `author`
79
+ - `likes`
80
+ - `downloads`
81
+ - `created_at`
82
+ - `last_modified`
83
+ - `num_params`
84
+ - `repo_url`
85
+ - model: `library_name`, `pipeline_tag`
86
+ - dataset: `description`, `paperswithcode_id`
87
+ - space: `sdk`, `models`, `datasets`, `subdomain`
88
+
89
+ ## Common collection fields
90
+
91
+ - `collection_id`
92
+ - `title`
93
+ - `owner`
94
+ - `description`
95
+ - `last_updated`
96
+ - `item_count`
97
+ - use `hf_collections_search(owner="<org-or-user>", ...)` for owner lookups
98
+
99
+ ## Common paper join points
100
+
101
+ - `organization`
102
+ - `submitted_by`
103
+ - `author_usernames`
104
+ - `discussion_id`
105
+
106
  Examples:
107
 
108
  ```py
109
+ result = await hf_models_search(pipeline_tag="text-to-image", limit=10)
110
+ result
111
+ ```
112
+
113
+ ```py
114
+ result = await hf_models_search(
115
  pipeline_tag="text-generation",
116
  sort="trending_score",
117
  limit=50,
118
  post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
119
  )
120
+ result
121
+ ```
122
+
123
+ ```py
124
+ result = await hf_collections_search(owner="Qwen", limit=10)
125
+ result
126
  ```
127
 
128
  Field-only pattern:
 
133
  fields=["repo_id", "author", "likes", "downloads", "repo_url"],
134
  limit=3,
135
  )
136
+ result = resp["items"]
137
+ result
138
  ```
139
 
140
  Coverage pattern:
 
147
  limit=20,
148
  fields=["repo_id", "repo_likes", "repo_url"],
149
  )
150
+ result = {"results": resp["items"], "coverage": resp["meta"]}
151
+ result
152
  ```
153
 
154
  Owner-inventory pattern:
 
164
  )
165
  meta = resp.get("meta") or {}
166
  if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
167
+ result = {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
168
+ else:
169
+ result = resp["items"]
170
+ result
171
  ```
172
 
173
+ Bounded join pattern:
174
 
175
  ```py
176
+ followers_resp = await hf_user_graph(
 
 
 
 
 
 
 
 
 
 
 
177
  relation="followers",
178
+ limit=100,
179
+ scan_limit=100,
180
+ fields=["username", "fullname"],
181
  )
182
+ followers = followers_resp.get("items") or []
183
+ matches = []
184
+ for follower in followers:
185
+ username = follower.get("username")
186
+ fullname = follower.get("fullname")
187
+ starts_with_b = (
188
+ (isinstance(username, str) and username.lower().startswith("b"))
189
+ or (isinstance(fullname, str) and fullname.lower().startswith("b"))
190
+ )
191
+ if starts_with_b:
192
+ matches.append(follower)
193
+ remaining_profile_calls = max(0, max_calls - 1)
194
+ results = []
195
+ for follower in matches[:remaining_profile_calls]:
196
+ username = follower.get("username")
197
+ if not username:
198
+ continue
199
+ profile = await hf_profile_summary(handle=username)
200
+ item = profile.get("item") or {}
201
+ results.append(
202
+ {
203
+ "username": username,
204
+ "fullname": follower.get("fullname"),
205
+ "github_url": item.get("github_url"),
206
+ }
207
+ )
208
+ result = {
209
+ "results": results,
210
+ "coverage": {
211
+ "followers": followers_resp.get("meta") or {},
212
+ "matching_followers_seen": len(matches),
213
+ "profile_calls_used": len(results),
214
+ "profile_hydration_partial": len(matches) > len(results),
215
+ },
216
+ }
217
+ result
218
  ```
219
 
220
+ Use the same pattern for other bounded joins:
221
+ - actor list → filter locally → hydrate exact matches
222
+ - actor list → per-actor likes/details → aggregate under `results`
223
+ - preserve upstream helper `meta` under top-level `coverage` whenever partiality matters
224
+
225
  ## Navigation graph
226
 
227
  Use the helper that matches the question type.
 
232
  - space search/list/discovery → `hf_spaces_search(...)`
233
  - cross-type repo search → `hf_repo_search(...)`
234
  - trending repos → `hf_trending(...)`
235
+ - Daily papers → `hf_daily_papers(...)`
236
+ - paper search → `hf_papers_search(...)`
237
+ - paper detail → `hf_paper_info(...)`
238
+ - paper markdown → `hf_read_paper(...)`
239
  - repo discussions → `hf_repo_discussions(...)`
240
  - specific discussion details → `hf_repo_discussion_details(...)`
241
  - users who liked one repo → `hf_repo_likers(...)`
242
+ - profile / overview / social/detail / aggregate counts → `hf_profile_summary(...)`
243
  - followers / following lists → `hf_user_graph(...)`
244
  - repos a user liked → `hf_user_likes(...)`
245
  - recent activity feed → `hf_recent_activity(...)`
 
271
  - `items` is the canonical list field.
272
  - `item` is just a singleton convenience.
273
  - `meta` contains helper-owned execution, limit, and coverage info.
 
274
 
275
  ## High-signal output rules
276
 
277
  - Prefer compact dict/list outputs over prose when the user asked for fields.
 
278
  - Use canonical snake_case keys in generated code and structured output.
279
  - Use `repo_id` as the display label for repos.
 
 
280
  - For joins/intersections/rankings, fetch the needed working set first and compute locally.
281
  - If the result is partial, use top-level keys `results` and `coverage`.
282
 
 
290
 
291
  await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
292
 
293
+ await hf_daily_papers(date: 'str | None' = None, week: 'str | None' = None, month: 'str | None' = None, submitter: 'str | None' = None, sort: 'str | None' = None, p: 'int | None' = None, limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
294
 
295
  await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
296
 
 
298
 
299
  await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
300
 
301
+ await hf_paper_info(paper_id: 'str', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
302
+
303
+ await hf_papers_search(query: 'str', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
304
+
305
  await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
306
 
307
+ await hf_read_paper(paper_id: 'str') -> 'dict[str, Any]'
308
+
309
  await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
310
 
311
  await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
 
387
  ### hf_daily_papers
388
 
389
  - category: `curated_feed`
390
+ - backed_by: `HfApi.list_daily_papers`
391
  - returns:
392
  - envelope: `{ok, item, items, meta, error}`
393
+ - row_type: `paper`
394
+ - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
395
+ - guaranteed_fields: `paper_id`, `title`, `published_at`
396
+ - optional_fields: `summary`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
397
+ - supported_params: `date`, `week`, `month`, `submitter`, `sort`, `p`, `limit`, `where`, `fields`
398
+ - param_values:
399
+ - sort: `published_at`, `trending`
400
  - fields_contract:
401
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
402
  - canonical_only: `true`
403
  - where_contract:
404
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
405
  - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
406
  - normalized_only: `true`
407
  - limit_contract:
408
  - default_limit: `20`
409
  - max_limit: `500`
410
+ - notes: Curated daily papers feed backed by HfApi.list_daily_papers. Useful join points: organization, submitted_by, author_usernames, discussion_id.
411
 
412
  ### hf_datasets_search
413
 
 
482
  - scan_max: `10000`
483
  - notes: Returns organization member summary rows.
484
 
485
+ ### hf_paper_info
486
+
487
+ - category: `paper_detail`
488
+ - backed_by: `HfApi.paper_info`
489
+ - returns:
490
+ - envelope: `{ok, item, items, meta, error}`
491
+ - row_type: `paper`
492
+ - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
493
+ - guaranteed_fields: `paper_id`, `title`, `published_at`
494
+ - optional_fields: `summary`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
495
+ - supported_params: `paper_id`, `fields`
496
+ - fields_contract:
497
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
498
+ - canonical_only: `true`
499
+ - notes: Exact paper metadata helper backed by HfApi.paper_info.
500
+
501
+ ### hf_papers_search
502
+
503
+ - category: `paper_search`
504
+ - backed_by: `HfApi.list_papers`
505
+ - returns:
506
+ - envelope: `{ok, item, items, meta, error}`
507
+ - row_type: `paper`
508
+ - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
509
+ - guaranteed_fields: `paper_id`, `title`, `published_at`
510
+ - optional_fields: `summary`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
511
+ - supported_params: `query`, `limit`, `where`, `fields`
512
+ - fields_contract:
513
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
514
+ - canonical_only: `true`
515
+ - where_contract:
516
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
517
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
518
+ - normalized_only: `true`
519
+ - limit_contract:
520
+ - default_limit: `20`
521
+ - max_limit: `500`
522
+ - notes: Paper search helper backed by HfApi.list_papers. Use organization, submitted_by, and author_usernames as the main Hub-native join points.
523
+
524
  ### hf_profile_summary
525
 
526
  - category: `profile_summary`
 
535
  - include: `likes`, `activity`
536
  - notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
537
 
538
+ ### hf_read_paper
539
+
540
+ - category: `paper_markdown`
541
+ - backed_by: `HfApi.read_paper`
542
+ - returns:
543
+ - envelope: `{ok, item, items, meta, error}`
544
+ - row_type: `paper_content`
545
+ - default_fields: `paper_id`, `content`
546
+ - guaranteed_fields: `paper_id`, `content`
547
+ - optional_fields: []
548
+ - supported_params: `paper_id`
549
+ - fields_contract:
550
+ - allowed_fields: `paper_id`, `content`
551
+ - canonical_only: `true`
552
+ - notes: Returns paper markdown content backed by HfApi.read_paper.
553
+
554
  ### hf_recent_activity
555
 
556
  - category: `activity_feed`
hf-hub-query.md CHANGED
@@ -1,19 +1,22 @@
1
  ---
2
  type: agent
3
  name: hf_hub_query
4
- model: gpt-oss
5
  use_history: false
6
  default: true
7
  description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, max_pages, and ranking_window for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
8
  shell: false
9
  skills: []
10
  function_tools:
11
- - tool_entrypoints.py:hf_hub_query_raw
 
 
 
12
  request_params:
13
  tool_result_mode: passthrough
14
  ---
15
 
16
- reasoning: high
17
 
18
  You are a **tool-using, read-only** Hugging Face Hub search/navigation agent.
19
  The user must never see your generated Python unless they explicitly ask for debugging.
@@ -23,18 +26,23 @@ The user must never see your generated Python unless they explicitly ask for deb
23
  - Put the generated Python only in the tool's `code` argument.
24
  - Do **not** output planning text, pseudocode, code fences, or contract explanations before the tool call.
25
  - Only ask a brief clarification question if the request is genuinely ambiguous or missing required identity.
26
- - The generated program must define `async def solve(query, max_calls): ...` and end with `await solve(query, max_calls)`.
27
- - Use the original user request, or a tight restatement, as the tool `query`.
 
 
 
 
 
28
  - Do **not** pass explicit `max_calls` or `timeout_sec` tool arguments unless the user explicitly asked for a non-default budget/timeout. Let the runtime defaults apply for ordinary requests.
29
  - One user request = one `hf_hub_query_raw` call. Do **not** retry in the same turn.
30
 
31
  ## Raw return rules
32
- - The return value of `solve(...)` is the user-facing payload.
33
- - Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
34
  - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
35
- - Prefer returning outputs directly unless post-processing is required. Do **NOT** rename fields unless asked specifically.
36
- - Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
37
  - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
38
- - Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.
39
 
40
  {{file:_monty_codegen_shared.md}}
 
1
  ---
2
  type: agent
3
  name: hf_hub_query
4
+ model: hf.openai/gpt-oss-120b:sambanova
5
  use_history: false
6
  default: true
7
  description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, max_pages, and ranking_window for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
8
  shell: false
9
  skills: []
10
  function_tools:
11
+ - entrypoint: tool_entrypoints.py:hf_hub_query_raw
12
+ variant: code
13
+ code_arg: code
14
+ language: python
15
  request_params:
16
  tool_result_mode: passthrough
17
  ---
18
 
19
+ reasoning: medium
20
 
21
  You are a **tool-using, read-only** Hugging Face Hub search/navigation agent.
22
  The user must never see your generated Python unless they explicitly ask for debugging.
 
26
  - Put the generated Python only in the tool's `code` argument.
27
  - Do **not** output planning text, pseudocode, code fences, or contract explanations before the tool call.
28
  - Only ask a brief clarification question if the request is genuinely ambiguous or missing required identity.
29
+ - The generated program is a top-level Monty Python script.
30
+ - `max_calls` is provided by the runtime as a top-level input.
31
+ - Always assign the final output to `result`.
32
+ - The final line must be exactly `result`.
33
+ - Never stop after `result = ...`; always add a final bare `result` line.
34
+ - Do **not** define or call `solve(...)`.
35
+ - The tool call only needs `code` unless you truly need optional raw-query metadata.
36
  - Do **not** pass explicit `max_calls` or `timeout_sec` tool arguments unless the user explicitly asked for a non-default budget/timeout. Let the runtime defaults apply for ordinary requests.
37
  - One user request = one `hf_hub_query_raw` call. Do **not** retry in the same turn.
38
 
39
  ## Raw return rules
40
+ - The value of `result` is the user-facing payload.
41
+ - Make `result` a dict/list when JSON is appropriate; use a string/number/bool only when that scalar is the intended payload.
42
  - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
43
+ - Prefer emitting outputs directly unless post-processing is required. Do **NOT** rename fields unless asked specifically.
44
+ - Runtime will wrap the value of `result` under `result` and attach runtime information under `meta`.
45
  - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
46
+ - Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` in generated code.
47
 
48
  {{file:_monty_codegen_shared.md}}
monty_api/__pycache__/__init__.cpython-313.pyc DELETED
Binary file (741 Bytes)
 
monty_api/__pycache__/__init__.cpython-314.pyc DELETED
Binary file (371 Bytes)
 
monty_api/__pycache__/aliases.cpython-313.pyc DELETED
Binary file (901 Bytes)
 
monty_api/__pycache__/aliases.cpython-314.pyc DELETED
Binary file (828 Bytes)
 
monty_api/__pycache__/constants.cpython-313.pyc DELETED
Binary file (2.99 kB)
 
monty_api/__pycache__/constants.cpython-314.pyc DELETED
Binary file (2.97 kB)
 
monty_api/__pycache__/context_types.cpython-313.pyc DELETED
Binary file (1.34 kB)
 
monty_api/__pycache__/context_types.cpython-314.pyc DELETED
Binary file (1.6 kB)
 
monty_api/__pycache__/helper_contracts.cpython-313.pyc DELETED
Binary file (20.8 kB)
 
monty_api/__pycache__/helper_contracts.cpython-314.pyc DELETED
Binary file (23.5 kB)
 
monty_api/__pycache__/http_runtime.cpython-313.pyc DELETED
Binary file (28.5 kB)
 
monty_api/__pycache__/http_runtime.cpython-314.pyc DELETED
Binary file (33.2 kB)
 
monty_api/__pycache__/query_entrypoints.cpython-313.pyc DELETED
Binary file (15.5 kB)
 
monty_api/__pycache__/query_entrypoints.cpython-314.pyc DELETED
Binary file (17.9 kB)
 
monty_api/__pycache__/registry.cpython-313.pyc DELETED
Binary file (14.5 kB)
 
monty_api/__pycache__/registry.cpython-314.pyc DELETED
Binary file (15.6 kB)
 
monty_api/__pycache__/runtime_context.cpython-313.pyc DELETED
Binary file (12 kB)
 
monty_api/__pycache__/runtime_context.cpython-314.pyc DELETED
Binary file (13.5 kB)
 
monty_api/__pycache__/runtime_envelopes.cpython-313.pyc DELETED
Binary file (10.2 kB)
 
monty_api/__pycache__/runtime_envelopes.cpython-314.pyc DELETED
Binary file (12 kB)
 
monty_api/__pycache__/runtime_filtering.cpython-313.pyc DELETED
Binary file (8.69 kB)
 
monty_api/__pycache__/runtime_filtering.cpython-314.pyc DELETED
Binary file (10.6 kB)
 
monty_api/__pycache__/tool_entrypoints.cpython-313.pyc DELETED
Binary file (1.79 kB)
 
monty_api/__pycache__/tool_entrypoints.cpython-314.pyc DELETED
Binary file (1.98 kB)
 
monty_api/__pycache__/validation.cpython-313.pyc DELETED
Binary file (16.2 kB)
 
monty_api/__pycache__/validation.cpython-314.pyc DELETED
Binary file (18.6 kB)
 
monty_api/constants.py CHANGED
@@ -183,22 +183,24 @@ COLLECTION_CANONICAL_FIELDS: tuple[str, ...] = (
183
  "item_count",
184
  )
185
 
186
- DAILY_PAPER_CANONICAL_FIELDS: tuple[str, ...] = (
187
  "paper_id",
188
  "title",
189
  "summary",
190
  "published_at",
191
- "submitted_on_daily_at",
192
  "authors",
 
193
  "organization",
194
  "submitted_by",
195
  "discussion_id",
196
  "upvotes",
197
- "github_repo_url",
 
 
 
198
  "github_stars",
199
- "project_page_url",
200
- "num_comments",
201
- "is_author_participating",
202
- "repo_id",
203
  "rank",
204
  )
 
 
 
183
  "item_count",
184
  )
185
 
186
+ PAPER_CANONICAL_FIELDS: tuple[str, ...] = (
187
  "paper_id",
188
  "title",
189
  "summary",
190
  "published_at",
191
+ "submitted_at",
192
  "authors",
193
+ "author_usernames",
194
  "organization",
195
  "submitted_by",
196
  "discussion_id",
197
  "upvotes",
198
+ "source",
199
+ "comments",
200
+ "project_page",
201
+ "github_repo",
202
  "github_stars",
 
 
 
 
203
  "rank",
204
  )
205
+
206
+ PAPER_CONTENT_FIELDS: tuple[str, ...] = ("paper_id", "content")
monty_api/helper_contracts.py CHANGED
@@ -16,9 +16,10 @@ from .constants import (
16
  ACTIVITY_CANONICAL_FIELDS,
17
  ACTOR_CANONICAL_FIELDS,
18
  COLLECTION_CANONICAL_FIELDS,
19
- DAILY_PAPER_CANONICAL_FIELDS,
20
  DISCUSSION_CANONICAL_FIELDS,
21
  DISCUSSION_DETAIL_CANONICAL_FIELDS,
 
 
22
  PROFILE_CANONICAL_FIELDS,
23
  REPO_CANONICAL_FIELDS,
24
  USER_CANONICAL_FIELDS,
@@ -76,9 +77,10 @@ FIELD_GROUPS: dict[str, list[str]] = {
76
  "activity": list(ACTIVITY_CANONICAL_FIELDS),
77
  "actor": list(ACTOR_CANONICAL_FIELDS),
78
  "collection": list(COLLECTION_CANONICAL_FIELDS),
79
- "daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
80
  "discussion": list(DISCUSSION_CANONICAL_FIELDS),
81
  "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
 
 
82
  "profile": list(PROFILE_CANONICAL_FIELDS),
83
  "repo": list(REPO_CANONICAL_FIELDS),
84
  "trending_repo": list(TRENDING_CANONICAL_FIELDS),
@@ -109,10 +111,12 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
109
  },
110
  "hf_daily_papers": {
111
  "category": "curated_feed",
112
- "row_type": "daily_paper",
113
- "fields_group": "daily_paper",
114
  "filter_param": "where",
115
- "filter_group": "daily_paper",
 
 
116
  },
117
  "hf_datasets_search": {
118
  "category": "wrapped_hf_repo_search",
@@ -142,6 +146,20 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
142
  "row_type": "profile",
143
  "param_values": {"include": ["likes", "activity"]},
144
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  "hf_recent_activity": {
146
  "category": "activity_feed",
147
  "row_type": "activity",
@@ -189,6 +207,12 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
189
  "row_type": "runtime_capability",
190
  "param_values": {"section": list(RUNTIME_CAPABILITY_SECTION_VALUES)},
191
  },
 
 
 
 
 
 
192
  "hf_spaces_search": {
193
  "category": "wrapped_hf_repo_search",
194
  "row_type": "repo",
@@ -396,6 +420,9 @@ def build_helper_contracts(
396
  param_values = _param_values_for_helper(helper_name)
397
  if param_values is not None:
398
  contract["param_values"] = param_values
 
 
 
399
 
400
  upstream_repo_type = spec.get("upstream_repo_type")
401
  if isinstance(upstream_repo_type, str):
 
16
  ACTIVITY_CANONICAL_FIELDS,
17
  ACTOR_CANONICAL_FIELDS,
18
  COLLECTION_CANONICAL_FIELDS,
 
19
  DISCUSSION_CANONICAL_FIELDS,
20
  DISCUSSION_DETAIL_CANONICAL_FIELDS,
21
+ PAPER_CANONICAL_FIELDS,
22
+ PAPER_CONTENT_FIELDS,
23
  PROFILE_CANONICAL_FIELDS,
24
  REPO_CANONICAL_FIELDS,
25
  USER_CANONICAL_FIELDS,
 
77
  "activity": list(ACTIVITY_CANONICAL_FIELDS),
78
  "actor": list(ACTOR_CANONICAL_FIELDS),
79
  "collection": list(COLLECTION_CANONICAL_FIELDS),
 
80
  "discussion": list(DISCUSSION_CANONICAL_FIELDS),
81
  "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
82
+ "paper": list(PAPER_CANONICAL_FIELDS),
83
+ "paper_content": list(PAPER_CONTENT_FIELDS),
84
  "profile": list(PROFILE_CANONICAL_FIELDS),
85
  "repo": list(REPO_CANONICAL_FIELDS),
86
  "trending_repo": list(TRENDING_CANONICAL_FIELDS),
 
111
  },
112
  "hf_daily_papers": {
113
  "category": "curated_feed",
114
+ "row_type": "paper",
115
+ "fields_group": "paper",
116
  "filter_param": "where",
117
+ "filter_group": "paper",
118
+ "param_values": {"sort": ["published_at", "trending"]},
119
+ "backed_by": "HfApi.list_daily_papers",
120
  },
121
  "hf_datasets_search": {
122
  "category": "wrapped_hf_repo_search",
 
146
  "row_type": "profile",
147
  "param_values": {"include": ["likes", "activity"]},
148
  },
149
+ "hf_paper_info": {
150
+ "category": "paper_detail",
151
+ "row_type": "paper",
152
+ "fields_group": "paper",
153
+ "backed_by": "HfApi.paper_info",
154
+ },
155
+ "hf_papers_search": {
156
+ "category": "paper_search",
157
+ "row_type": "paper",
158
+ "fields_group": "paper",
159
+ "filter_param": "where",
160
+ "filter_group": "paper",
161
+ "backed_by": "HfApi.list_papers",
162
+ },
163
  "hf_recent_activity": {
164
  "category": "activity_feed",
165
  "row_type": "activity",
 
207
  "row_type": "runtime_capability",
208
  "param_values": {"section": list(RUNTIME_CAPABILITY_SECTION_VALUES)},
209
  },
210
+ "hf_read_paper": {
211
+ "category": "paper_markdown",
212
+ "row_type": "paper_content",
213
+ "fields_group": "paper_content",
214
+ "backed_by": "HfApi.read_paper",
215
+ },
216
  "hf_spaces_search": {
217
  "category": "wrapped_hf_repo_search",
218
  "row_type": "repo",
 
420
  param_values = _param_values_for_helper(helper_name)
421
  if param_values is not None:
422
  contract["param_values"] = param_values
423
+ backed_by = spec.get("backed_by")
424
+ if isinstance(backed_by, str):
425
+ contract["backed_by"] = backed_by
426
 
427
  upstream_repo_type = spec.get("upstream_repo_type")
428
  if isinstance(upstream_repo_type, str):
monty_api/helpers/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
  from .activity import register_activity_helpers
2
  from .collections import register_collection_helpers
3
  from .introspection import register_introspection_helpers
 
4
  from .profiles import register_profile_helpers
5
  from .repos import register_repo_helpers
6
 
@@ -8,6 +9,7 @@ __all__ = [
8
  "register_activity_helpers",
9
  "register_collection_helpers",
10
  "register_introspection_helpers",
 
11
  "register_profile_helpers",
12
  "register_repo_helpers",
13
  ]
 
1
  from .activity import register_activity_helpers
2
  from .collections import register_collection_helpers
3
  from .introspection import register_introspection_helpers
4
+ from .papers import register_paper_helpers
5
  from .profiles import register_profile_helpers
6
  from .repos import register_repo_helpers
7
 
 
9
  "register_activity_helpers",
10
  "register_collection_helpers",
11
  "register_introspection_helpers",
12
+ "register_paper_helpers",
13
  "register_profile_helpers",
14
  "register_repo_helpers",
15
  ]
monty_api/helpers/__pycache__/__init__.cpython-313.pyc DELETED
Binary file (487 Bytes)
 
monty_api/helpers/__pycache__/__init__.cpython-314.pyc DELETED
Binary file (489 Bytes)
 
monty_api/helpers/__pycache__/activity.cpython-313.pyc DELETED
Binary file (8.71 kB)
 
monty_api/helpers/__pycache__/activity.cpython-314.pyc DELETED
Binary file (9.3 kB)
 
monty_api/helpers/__pycache__/collections.cpython-313.pyc DELETED
Binary file (12.7 kB)
 
monty_api/helpers/__pycache__/collections.cpython-314.pyc DELETED
Binary file (13.8 kB)
 
monty_api/helpers/__pycache__/common.cpython-313.pyc DELETED
Binary file (1.5 kB)
 
monty_api/helpers/__pycache__/common.cpython-314.pyc DELETED
Binary file (1.64 kB)
 
monty_api/helpers/__pycache__/introspection.cpython-313.pyc DELETED
Binary file (11.1 kB)
 
monty_api/helpers/__pycache__/introspection.cpython-314.pyc DELETED
Binary file (12.4 kB)
 
monty_api/helpers/__pycache__/profiles.cpython-313.pyc DELETED
Binary file (32.7 kB)
 
monty_api/helpers/__pycache__/profiles.cpython-314.pyc DELETED
Binary file (35.3 kB)
 
monty_api/helpers/__pycache__/repos.cpython-313.pyc DELETED
Binary file (49.3 kB)
 
monty_api/helpers/__pycache__/repos.cpython-314.pyc DELETED
Binary file (53.2 kB)
 
monty_api/helpers/introspection.py CHANGED
@@ -10,7 +10,6 @@ from ..constants import (
10
  ACTIVITY_CANONICAL_FIELDS,
11
  ACTOR_CANONICAL_FIELDS,
12
  COLLECTION_CANONICAL_FIELDS,
13
- DAILY_PAPER_CANONICAL_FIELDS,
14
  DISCUSSION_CANONICAL_FIELDS,
15
  DISCUSSION_DETAIL_CANONICAL_FIELDS,
16
  DEFAULT_MAX_CALLS,
@@ -19,6 +18,8 @@ from ..constants import (
19
  LIKES_SCAN_LIMIT_CAP,
20
  MAX_CALLS_LIMIT,
21
  OUTPUT_ITEMS_TRUNCATION_LIMIT,
 
 
22
  PROFILE_CANONICAL_FIELDS,
23
  RECENT_ACTIVITY_SCAN_MAX_PAGES,
24
  REPO_CANONICAL_FIELDS,
@@ -140,7 +141,8 @@ async def hf_runtime_capabilities(
140
  "user_likes": list(USER_LIKES_CANONICAL_FIELDS),
141
  "activity": list(ACTIVITY_CANONICAL_FIELDS),
142
  "collection": list(COLLECTION_CANONICAL_FIELDS),
143
- "daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
 
144
  "discussion": list(DISCUSSION_CANONICAL_FIELDS),
145
  "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
146
  },
 
10
  ACTIVITY_CANONICAL_FIELDS,
11
  ACTOR_CANONICAL_FIELDS,
12
  COLLECTION_CANONICAL_FIELDS,
 
13
  DISCUSSION_CANONICAL_FIELDS,
14
  DISCUSSION_DETAIL_CANONICAL_FIELDS,
15
  DEFAULT_MAX_CALLS,
 
18
  LIKES_SCAN_LIMIT_CAP,
19
  MAX_CALLS_LIMIT,
20
  OUTPUT_ITEMS_TRUNCATION_LIMIT,
21
+ PAPER_CANONICAL_FIELDS,
22
+ PAPER_CONTENT_FIELDS,
23
  PROFILE_CANONICAL_FIELDS,
24
  RECENT_ACTIVITY_SCAN_MAX_PAGES,
25
  REPO_CANONICAL_FIELDS,
 
141
  "user_likes": list(USER_LIKES_CANONICAL_FIELDS),
142
  "activity": list(ACTIVITY_CANONICAL_FIELDS),
143
  "collection": list(COLLECTION_CANONICAL_FIELDS),
144
+ "paper": list(PAPER_CANONICAL_FIELDS),
145
+ "paper_content": list(PAPER_CONTENT_FIELDS),
146
  "discussion": list(DISCUSSION_CANONICAL_FIELDS),
147
  "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
148
  },
monty_api/helpers/papers.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from functools import partial
4
+ from typing import Any, Callable
5
+
6
+ from ..constants import OUTPUT_ITEMS_TRUNCATION_LIMIT, PAPER_CANONICAL_FIELDS
7
+ from ..context_types import HelperRuntimeContext
8
+
9
+
10
+ def _extract_author_usernames(authors: list[Any] | None) -> list[str] | None:
11
+ if not isinstance(authors, list):
12
+ return None
13
+ usernames: list[str] = []
14
+ for author in authors:
15
+ user = getattr(author, "user", None)
16
+ for candidate in (
17
+ getattr(user, "username", None),
18
+ getattr(user, "user", None),
19
+ getattr(user, "name", None),
20
+ ):
21
+ if isinstance(candidate, str):
22
+ cleaned = candidate.strip()
23
+ if cleaned and cleaned not in usernames:
24
+ usernames.append(cleaned)
25
+ break
26
+ return usernames or None
27
+
28
+
29
+ def _normalize_paper_sort(sort: str | None) -> tuple[str | None, str | None]:
30
+ cleaned = str(sort or "").strip()
31
+ if not cleaned:
32
+ return (None, None)
33
+ alias_map = {
34
+ "published_at": "publishedAt",
35
+ "publishedAt": "publishedAt",
36
+ "trending": "trending",
37
+ }
38
+ resolved = alias_map.get(cleaned)
39
+ if resolved is None:
40
+ return (None, "sort must be one of published_at, publishedAt, trending")
41
+ return (resolved, None)
42
+
43
+
44
+ def _normalize_paper_info(
45
+ ctx: HelperRuntimeContext,
46
+ paper: Any,
47
+ *,
48
+ rank: int | None = None,
49
+ ) -> dict[str, Any]:
50
+ authors = getattr(paper, "authors", None)
51
+ organization = getattr(paper, "organization", None)
52
+ submitted_by = getattr(paper, "submitted_by", None)
53
+ row = {
54
+ "paper_id": getattr(paper, "id", None),
55
+ "title": getattr(paper, "title", None),
56
+ "summary": getattr(paper, "summary", None),
57
+ "published_at": ctx._dt_to_str(getattr(paper, "published_at", None)),
58
+ "submitted_at": ctx._dt_to_str(getattr(paper, "submitted_at", None)),
59
+ "authors": ctx._extract_author_names(authors),
60
+ "author_usernames": _extract_author_usernames(authors),
61
+ "organization": ctx._extract_profile_name(organization),
62
+ "submitted_by": ctx._extract_profile_name(submitted_by),
63
+ "discussion_id": getattr(paper, "discussion_id", None),
64
+ "upvotes": ctx._as_int(getattr(paper, "upvotes", None)),
65
+ "source": getattr(paper, "source", None),
66
+ "comments": ctx._as_int(getattr(paper, "comments", None)),
67
+ "project_page": getattr(paper, "project_page", None),
68
+ "github_repo": getattr(paper, "github_repo", None),
69
+ "github_stars": ctx._as_int(getattr(paper, "github_stars", None)),
70
+ "ai_summary": getattr(paper, "ai_summary", None),
71
+ "ai_keywords": getattr(paper, "ai_keywords", None),
72
+ "rank": rank,
73
+ }
74
+ return row
75
+
76
+
77
+ async def _run_paper_list_helper(
78
+ ctx: HelperRuntimeContext,
79
+ *,
80
+ helper_name: str,
81
+ source: str,
82
+ loader: Callable[[Any, int], list[Any]],
83
+ limit: int,
84
+ where: dict[str, Any] | None,
85
+ fields: list[str] | None,
86
+ ordered_ranking: bool = False,
87
+ **meta: Any,
88
+ ) -> dict[str, Any]:
89
+ start_calls = ctx.call_count["n"]
90
+ default_limit = ctx._policy_int(helper_name, "default_limit", 20)
91
+ max_limit = ctx._policy_int(
92
+ helper_name, "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
93
+ )
94
+ requested_limit = limit
95
+ applied_limit = ctx._clamp_int(
96
+ limit,
97
+ default=default_limit,
98
+ minimum=1,
99
+ maximum=max_limit,
100
+ )
101
+ limit_meta = ctx._derive_limit_metadata(
102
+ requested_limit=requested_limit,
103
+ applied_limit=applied_limit,
104
+ default_limit_used=limit == default_limit,
105
+ )
106
+ api = ctx._get_hf_api_client()
107
+ try:
108
+ payload = ctx._host_hf_call(
109
+ source,
110
+ lambda: loader(api, applied_limit),
111
+ )
112
+ except Exception as exc:
113
+ return ctx._helper_error(start_calls=start_calls, source=source, error=exc)
114
+
115
+ items = [
116
+ _normalize_paper_info(ctx, paper, rank=index if ordered_ranking else None)
117
+ for index, paper in enumerate(payload[:applied_limit], start=1)
118
+ ]
119
+ try:
120
+ items = ctx._apply_where(items, where, allowed_fields=PAPER_CANONICAL_FIELDS)
121
+ except ValueError as exc:
122
+ return ctx._helper_error(start_calls=start_calls, source=source, error=exc)
123
+ matched = len(items)
124
+ try:
125
+ items = ctx._project_items(
126
+ items[:applied_limit],
127
+ fields,
128
+ allowed_fields=PAPER_CANONICAL_FIELDS,
129
+ )
130
+ except ValueError as exc:
131
+ return ctx._helper_error(start_calls=start_calls, source=source, error=exc)
132
+
133
+ limit_boundary_hit = len(payload) >= applied_limit
134
+ next_request_hint = None
135
+ if limit_boundary_hit:
136
+ next_request_hint = (
137
+ f"Increase limit above {applied_limit} to check whether more rows exist"
138
+ )
139
+
140
+ return ctx._helper_success(
141
+ start_calls=start_calls,
142
+ source=source,
143
+ items=items,
144
+ limit=applied_limit,
145
+ scanned=len(payload),
146
+ matched=matched,
147
+ returned=len(items),
148
+ ordered_ranking=ordered_ranking,
149
+ more_available="unknown" if limit_boundary_hit else False,
150
+ limit_boundary_hit=limit_boundary_hit,
151
+ next_request_hint=next_request_hint,
152
+ **limit_meta,
153
+ **meta,
154
+ )
155
+
156
+
157
+ async def hf_daily_papers(
158
+ ctx: HelperRuntimeContext,
159
+ date: str | None = None,
160
+ week: str | None = None,
161
+ month: str | None = None,
162
+ submitter: str | None = None,
163
+ sort: str | None = None,
164
+ p: int | None = None,
165
+ limit: int = 20,
166
+ where: dict[str, Any] | None = None,
167
+ fields: list[str] | None = None,
168
+ ) -> dict[str, Any]:
169
+ normalized_sort, sort_error = _normalize_paper_sort(sort)
170
+ if sort_error:
171
+ return ctx._helper_error(
172
+ start_calls=ctx.call_count["n"],
173
+ source="/api/daily_papers",
174
+ error=sort_error,
175
+ )
176
+ return await _run_paper_list_helper(
177
+ ctx,
178
+ helper_name="hf_daily_papers",
179
+ source="/api/daily_papers",
180
+ loader=lambda api, applied_limit: list(
181
+ api.list_daily_papers(
182
+ date=date,
183
+ week=week,
184
+ month=month,
185
+ submitter=submitter,
186
+ sort=normalized_sort,
187
+ p=p,
188
+ limit=applied_limit,
189
+ )
190
+ ),
191
+ limit=limit,
192
+ where=where,
193
+ fields=fields,
194
+ ordered_ranking=True,
195
+ date=date,
196
+ week=week,
197
+ month=month,
198
+ submitter=submitter,
199
+ sort=normalized_sort,
200
+ p=p,
201
+ )
202
+
203
+
204
+ async def hf_papers_search(
205
+ ctx: HelperRuntimeContext,
206
+ query: str,
207
+ limit: int = 20,
208
+ where: dict[str, Any] | None = None,
209
+ fields: list[str] | None = None,
210
+ ) -> dict[str, Any]:
211
+ term = str(query or "").strip()
212
+ if not term:
213
+ return ctx._helper_error(
214
+ start_calls=ctx.call_count["n"],
215
+ source="/api/papers/search",
216
+ error="query is required",
217
+ )
218
+ return await _run_paper_list_helper(
219
+ ctx,
220
+ helper_name="hf_papers_search",
221
+ source="/api/papers/search",
222
+ loader=lambda api, applied_limit: list(
223
+ api.list_papers(query=term, limit=applied_limit)
224
+ ),
225
+ limit=limit,
226
+ where=where,
227
+ fields=fields,
228
+ query=term,
229
+ )
230
+
231
+
232
+ async def hf_paper_info(
233
+ ctx: HelperRuntimeContext,
234
+ paper_id: str,
235
+ fields: list[str] | None = None,
236
+ ) -> dict[str, Any]:
237
+ start_calls = ctx.call_count["n"]
238
+ pid = str(paper_id or "").strip()
239
+ if not pid:
240
+ return ctx._helper_error(
241
+ start_calls=start_calls,
242
+ source="/api/papers/<paper_id>",
243
+ error="paper_id is required",
244
+ )
245
+ try:
246
+ paper = ctx._host_hf_call(
247
+ f"/api/papers/{pid}",
248
+ lambda: ctx._get_hf_api_client().paper_info(id=pid),
249
+ )
250
+ except Exception as exc:
251
+ return ctx._helper_error(
252
+ start_calls=start_calls,
253
+ source=f"/api/papers/{pid}",
254
+ error=exc,
255
+ paper_id=pid,
256
+ )
257
+ item = _normalize_paper_info(ctx, paper)
258
+ items = [item]
259
+ try:
260
+ items = ctx._project_items(items, fields, allowed_fields=PAPER_CANONICAL_FIELDS)
261
+ except ValueError as exc:
262
+ return ctx._helper_error(
263
+ start_calls=start_calls,
264
+ source=f"/api/papers/{pid}",
265
+ error=exc,
266
+ paper_id=pid,
267
+ )
268
+ return ctx._helper_success(
269
+ start_calls=start_calls,
270
+ source=f"/api/papers/{pid}",
271
+ items=items,
272
+ paper_id=pid,
273
+ returned=len(items),
274
+ matched=len(items),
275
+ )
276
+
277
+
278
+ async def hf_read_paper(
279
+ ctx: HelperRuntimeContext,
280
+ paper_id: str,
281
+ ) -> dict[str, Any]:
282
+ start_calls = ctx.call_count["n"]
283
+ pid = str(paper_id or "").strip()
284
+ if not pid:
285
+ return ctx._helper_error(
286
+ start_calls=start_calls,
287
+ source="/papers/<paper_id>.md",
288
+ error="paper_id is required",
289
+ )
290
+ try:
291
+ content = ctx._host_hf_call(
292
+ f"/papers/{pid}.md",
293
+ lambda: ctx._get_hf_api_client().read_paper(id=pid),
294
+ )
295
+ except Exception as exc:
296
+ return ctx._helper_error(
297
+ start_calls=start_calls,
298
+ source=f"/papers/{pid}.md",
299
+ error=exc,
300
+ paper_id=pid,
301
+ )
302
+ return ctx._helper_success(
303
+ start_calls=start_calls,
304
+ source=f"/papers/{pid}.md",
305
+ items=[{"paper_id": pid, "content": content}],
306
+ paper_id=pid,
307
+ returned=1,
308
+ matched=1,
309
+ )
310
+
311
+
312
+ def register_paper_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
313
+ return {
314
+ "hf_daily_papers": partial(hf_daily_papers, ctx),
315
+ "hf_papers_search": partial(hf_papers_search, ctx),
316
+ "hf_paper_info": partial(hf_paper_info, ctx),
317
+ "hf_read_paper": partial(hf_read_paper, ctx),
318
+ }
monty_api/helpers/profiles.py CHANGED
@@ -338,8 +338,8 @@ async def hf_org_members(
338
  )
339
  sample_complete = (
340
  exact_count
341
- and len(normalized) <= applied_limit
342
- and (not count_only or len(normalized) == 0)
343
  )
344
  more_available = ctx._derive_more_available(
345
  sample_complete=sample_complete,
@@ -372,13 +372,18 @@ async def hf_org_members(
372
  "organization": org,
373
  },
374
  limit_plan=limit_plan,
375
- matched_count=len(normalized),
376
  returned_count=len(items),
377
  exact_count=exact_count,
378
  count_only=count_only,
379
  sample_complete=sample_complete,
380
  more_available=more_available,
381
- scan_limit_hit=scan_limit_hit,
 
 
 
 
 
382
  )
383
  return ctx._helper_success(
384
  start_calls=start_calls, source=endpoint, items=items, meta=meta
@@ -573,8 +578,8 @@ async def _user_graph_helper(
573
  )
574
  sample_complete = (
575
  exact_count
576
- and len(normalized) <= applied_limit
577
- and (not count_only or len(normalized) == 0)
578
  )
579
  more_available = ctx._derive_more_available(
580
  sample_complete=sample_complete,
@@ -617,13 +622,18 @@ async def _user_graph_helper(
617
  "organization": u if entity_type == "organization" else None,
618
  },
619
  limit_plan=limit_plan,
620
- matched_count=len(normalized),
621
  returned_count=len(items),
622
  exact_count=exact_count,
623
  count_only=count_only,
624
  sample_complete=sample_complete,
625
  more_available=more_available,
626
- scan_limit_hit=scan_limit_hit,
 
 
 
 
 
627
  )
628
  return ctx._helper_success(
629
  start_calls=start_calls, source=endpoint, items=items, meta=meta
 
338
  )
339
  sample_complete = (
340
  exact_count
341
+ and total_matched <= applied_limit
342
+ and (not count_only or total_matched == 0)
343
  )
344
  more_available = ctx._derive_more_available(
345
  sample_complete=sample_complete,
 
372
  "organization": org,
373
  },
374
  limit_plan=limit_plan,
375
+ matched_count=total_matched,
376
  returned_count=len(items),
377
  exact_count=exact_count,
378
  count_only=count_only,
379
  sample_complete=sample_complete,
380
  more_available=more_available,
381
+ scan_limit_hit=scan_limit_hit
382
+ or (
383
+ overview_total is not None
384
+ and overview_total > observed_total
385
+ and observed_total >= scan_lim
386
+ ),
387
  )
388
  return ctx._helper_success(
389
  start_calls=start_calls, source=endpoint, items=items, meta=meta
 
578
  )
579
  sample_complete = (
580
  exact_count
581
+ and total_matched <= applied_limit
582
+ and (not count_only or total_matched == 0)
583
  )
584
  more_available = ctx._derive_more_available(
585
  sample_complete=sample_complete,
 
622
  "organization": u if entity_type == "organization" else None,
623
  },
624
  limit_plan=limit_plan,
625
+ matched_count=total_matched,
626
  returned_count=len(items),
627
  exact_count=exact_count,
628
  count_only=count_only,
629
  sample_complete=sample_complete,
630
  more_available=more_available,
631
+ scan_limit_hit=scan_limit_hit
632
+ or (
633
+ overview_total is not None
634
+ and overview_total > observed_total
635
+ and observed_total >= scan_lim
636
+ ),
637
  )
638
  return ctx._helper_success(
639
  start_calls=start_calls, source=endpoint, items=items, meta=meta
monty_api/helpers/repos.py CHANGED
@@ -7,7 +7,6 @@ from ..context_types import HelperRuntimeContext
7
  from ..helper_contracts import repo_expand_alias_map
8
  from ..constants import (
9
  ACTOR_CANONICAL_FIELDS,
10
- DAILY_PAPER_CANONICAL_FIELDS,
11
  EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
12
  LIKES_ENRICHMENT_MAX_REPOS,
13
  LIKES_RANKING_WINDOW_DEFAULT,
@@ -1287,62 +1286,6 @@ async def hf_trending(
1287
  )
1288
 
1289
 
1290
- async def hf_daily_papers(
1291
- ctx: HelperRuntimeContext,
1292
- limit: int = 20,
1293
- where: dict[str, Any] | None = None,
1294
- fields: list[str] | None = None,
1295
- ) -> dict[str, Any]:
1296
- start_calls = ctx.call_count["n"]
1297
- default_limit = ctx._policy_int("hf_daily_papers", "default_limit", 20)
1298
- max_limit = ctx._policy_int(
1299
- "hf_daily_papers", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
1300
- )
1301
- lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
1302
- resp = ctx._host_raw_call("/api/daily_papers", params={"limit": lim})
1303
- if not resp.get("ok"):
1304
- return ctx._helper_error(
1305
- start_calls=start_calls,
1306
- source="/api/daily_papers",
1307
- error=resp.get("error") or "daily papers fetch failed",
1308
- )
1309
- payload = resp.get("data") if isinstance(resp.get("data"), list) else []
1310
- items: list[dict[str, Any]] = []
1311
- for idx, row in enumerate(payload[:lim], start=1):
1312
- if not isinstance(row, dict):
1313
- continue
1314
- items.append(ctx._normalize_daily_paper_row(row, rank=idx))
1315
- try:
1316
- items = ctx._apply_where(
1317
- items, where, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
1318
- )
1319
- except ValueError as exc:
1320
- return ctx._helper_error(
1321
- start_calls=start_calls,
1322
- source="/api/daily_papers",
1323
- error=exc,
1324
- )
1325
- matched = len(items)
1326
- try:
1327
- items = ctx._project_daily_paper_items(items[:lim], fields)
1328
- except ValueError as exc:
1329
- return ctx._helper_error(
1330
- start_calls=start_calls,
1331
- source="/api/daily_papers",
1332
- error=exc,
1333
- )
1334
- return ctx._helper_success(
1335
- start_calls=start_calls,
1336
- source="/api/daily_papers",
1337
- items=items,
1338
- limit=lim,
1339
- scanned=len(payload),
1340
- matched=matched,
1341
- returned=len(items),
1342
- ordered_ranking=True,
1343
- )
1344
-
1345
-
1346
  def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
1347
  return {
1348
  "hf_models_search": partial(hf_models_search, ctx),
@@ -1355,5 +1298,4 @@ def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[...,
1355
  "hf_repo_discussion_details": partial(hf_repo_discussion_details, ctx),
1356
  "hf_repo_details": partial(hf_repo_details, ctx),
1357
  "hf_trending": partial(hf_trending, ctx),
1358
- "hf_daily_papers": partial(hf_daily_papers, ctx),
1359
  }
 
7
  from ..helper_contracts import repo_expand_alias_map
8
  from ..constants import (
9
  ACTOR_CANONICAL_FIELDS,
 
10
  EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
11
  LIKES_ENRICHMENT_MAX_REPOS,
12
  LIKES_RANKING_WINDOW_DEFAULT,
 
1286
  )
1287
 
1288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1289
  def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
1290
  return {
1291
  "hf_models_search": partial(hf_models_search, ctx),
 
1298
  "hf_repo_discussion_details": partial(hf_repo_discussion_details, ctx),
1299
  "hf_repo_details": partial(hf_repo_details, ctx),
1300
  "hf_trending": partial(hf_trending, ctx),
 
1301
  }
monty_api/http_runtime.py CHANGED
@@ -429,47 +429,6 @@ def _normalize_trending_row(
429
  return row
430
 
431
 
432
- def _normalize_daily_paper_row(
433
- row: dict[str, Any], rank: int | None = None
434
- ) -> dict[str, Any]:
435
- paper = row.get("paper") if isinstance(row.get("paper"), dict) else {}
436
- org = (
437
- row.get("organization")
438
- if isinstance(row.get("organization"), dict)
439
- else paper.get("organization")
440
- )
441
- organization = None
442
- if isinstance(org, dict):
443
- organization = org.get("name") or org.get("fullname")
444
-
445
- item = {
446
- "paper_id": paper.get("id"),
447
- "title": row.get("title") or paper.get("title"),
448
- "summary": row.get("summary")
449
- or paper.get("summary")
450
- or paper.get("ai_summary"),
451
- "published_at": row.get("publishedAt") or paper.get("publishedAt"),
452
- "submitted_on_daily_at": paper.get("submittedOnDailyAt"),
453
- "authors": _extract_author_names(paper.get("authors")),
454
- "organization": organization,
455
- "submitted_by": _extract_profile_name(
456
- row.get("submittedBy") or paper.get("submittedOnDailyBy")
457
- ),
458
- "discussion_id": paper.get("discussionId"),
459
- "upvotes": _as_int(paper.get("upvotes")),
460
- "github_repo_url": paper.get("githubRepo"),
461
- "github_stars": _as_int(paper.get("githubStars")),
462
- "project_page_url": paper.get("projectPage"),
463
- "num_comments": _as_int(row.get("numComments")),
464
- "is_author_participating": row.get("isAuthorParticipating")
465
- if isinstance(row.get("isAuthorParticipating"), bool)
466
- else None,
467
- "repo_id": row.get("repo_id") or paper.get("repo_id"),
468
- "rank": rank,
469
- }
470
- return item
471
-
472
-
473
  def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | None:
474
  repo_id = row.get("id") or row.get("repoId") or row.get("repo_id")
475
  if not isinstance(repo_id, str) or not repo_id:
 
429
  return row
430
 
431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | None:
433
  repo_id = row.get("id") or row.get("repoId") or row.get("repo_id")
434
  if not isinstance(repo_id, str) or not repo_id: