Spaces:

evalstate
/

hf-hub-query

Running

App Files Files Community

evalstate commited on Apr 9

Commit

8dd9efe

1 Parent(s): 376676a

Update Monty paper helpers and prompt surface

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

_monty_codegen_shared.md +203 -54
hf-hub-query.md +18 -10
monty_api/__pycache__/__init__.cpython-313.pyc +0 -0
monty_api/__pycache__/__init__.cpython-314.pyc +0 -0
monty_api/__pycache__/aliases.cpython-313.pyc +0 -0
monty_api/__pycache__/aliases.cpython-314.pyc +0 -0
monty_api/__pycache__/constants.cpython-313.pyc +0 -0
monty_api/__pycache__/constants.cpython-314.pyc +0 -0
monty_api/__pycache__/context_types.cpython-313.pyc +0 -0
monty_api/__pycache__/context_types.cpython-314.pyc +0 -0
monty_api/__pycache__/helper_contracts.cpython-313.pyc +0 -0
monty_api/__pycache__/helper_contracts.cpython-314.pyc +0 -0
monty_api/__pycache__/http_runtime.cpython-313.pyc +0 -0
monty_api/__pycache__/http_runtime.cpython-314.pyc +0 -0
monty_api/__pycache__/query_entrypoints.cpython-313.pyc +0 -0
monty_api/__pycache__/query_entrypoints.cpython-314.pyc +0 -0
monty_api/__pycache__/registry.cpython-313.pyc +0 -0
monty_api/__pycache__/registry.cpython-314.pyc +0 -0
monty_api/__pycache__/runtime_context.cpython-313.pyc +0 -0
monty_api/__pycache__/runtime_context.cpython-314.pyc +0 -0
monty_api/__pycache__/runtime_envelopes.cpython-313.pyc +0 -0
monty_api/__pycache__/runtime_envelopes.cpython-314.pyc +0 -0
monty_api/__pycache__/runtime_filtering.cpython-313.pyc +0 -0
monty_api/__pycache__/runtime_filtering.cpython-314.pyc +0 -0
monty_api/__pycache__/tool_entrypoints.cpython-313.pyc +0 -0
monty_api/__pycache__/tool_entrypoints.cpython-314.pyc +0 -0
monty_api/__pycache__/validation.cpython-313.pyc +0 -0
monty_api/__pycache__/validation.cpython-314.pyc +0 -0
monty_api/constants.py +9 -7
monty_api/helper_contracts.py +32 -5
monty_api/helpers/__init__.py +2 -0
monty_api/helpers/__pycache__/__init__.cpython-313.pyc +0 -0
monty_api/helpers/__pycache__/__init__.cpython-314.pyc +0 -0
monty_api/helpers/__pycache__/activity.cpython-313.pyc +0 -0
monty_api/helpers/__pycache__/activity.cpython-314.pyc +0 -0
monty_api/helpers/__pycache__/collections.cpython-313.pyc +0 -0
monty_api/helpers/__pycache__/collections.cpython-314.pyc +0 -0
monty_api/helpers/__pycache__/common.cpython-313.pyc +0 -0
monty_api/helpers/__pycache__/common.cpython-314.pyc +0 -0
monty_api/helpers/__pycache__/introspection.cpython-313.pyc +0 -0
monty_api/helpers/__pycache__/introspection.cpython-314.pyc +0 -0
monty_api/helpers/__pycache__/profiles.cpython-313.pyc +0 -0
monty_api/helpers/__pycache__/profiles.cpython-314.pyc +0 -0
monty_api/helpers/__pycache__/repos.cpython-313.pyc +0 -0
monty_api/helpers/__pycache__/repos.cpython-314.pyc +0 -0
monty_api/helpers/introspection.py +4 -2
monty_api/helpers/papers.py +318 -0
monty_api/helpers/profiles.py +18 -8
monty_api/helpers/repos.py +0 -58
monty_api/http_runtime.py +0 -41

_monty_codegen_shared.md CHANGED Viewed

@@ -3,24 +3,31 @@
 - You are writing Python to be executed in a secure runtime environment.
 - **NEVER** use `import` - it is NOT available in this environment.
 - All helper calls are async: always use `await`.
-- Use this exact outer shape:
 ```py
-async def solve(query, max_calls):
-    ...
-await solve(query, max_calls)
 ```
 - `max_calls` is the total external-call budget for the whole program.
 - Use only documented `hf_*` helpers.
-- Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
-- Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
-- Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
-- If the user says "return only" some fields, return exactly that final shape.
-- If a helper already returns the requested row shape, return `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, return `{"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
 - For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
-- If a current-user helper returns `ok=false`, return that helper response directly.
 ## Search rules
@@ -41,35 +48,81 @@ await solve(query, max_calls)
 - `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
 - When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
 - For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
 - Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
 - Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
 - Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
-- Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
 - For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
 - For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
 - Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
 - Push constraints upstream whenever a first-class helper argument exists.
 - `post_filter` is only for normalized row filters that cannot be pushed upstream.
 - Keep `post_filter` simple:
   - exact match or `in` for returned fields like `runtime_stage`
   - `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
 - `num_params` is one of the main valid reasons to use `post_filter` on model search today.
 - Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
 Examples:
 ```py
-await hf_models_search(pipeline_tag="text-to-image", limit=10)
-await hf_datasets_search(search="speech", sort="downloads", limit=10)
-await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
-await hf_models_search(
     pipeline_tag="text-generation",
     sort="trending_score",
     limit=50,
     post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
 )
-await hf_collections_search(owner="Qwen", limit=10)
 ```
 Field-only pattern:
@@ -80,7 +133,8 @@ resp = await hf_models_search(
     fields=["repo_id", "author", "likes", "downloads", "repo_url"],
     limit=3,
 )
-return resp["items"]
 ```
 Coverage pattern:
@@ -93,7 +147,8 @@ resp = await hf_user_likes(
     limit=20,
     fields=["repo_id", "repo_likes", "repo_url"],
 )
-return {"results": resp["items"], "coverage": resp["meta"]}
 ```
 Owner-inventory pattern:
@@ -109,33 +164,64 @@ resp = await hf_spaces_search(
 )
 meta = resp.get("meta") or {}
 if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
-    return {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
-return resp["items"]
 ```
-Profile-count pattern:
 ```py
-profile = await hf_profile_summary(handle="mishig")
-item = profile["item"] or {}
-return {
-    "followers_count": item.get("followers_count"),
-    "following_count": item.get("following_count"),
-}
-```
-Pro-followers pattern:
-```py
-followers = await hf_user_graph(
     relation="followers",
-    pro_only=True,
-    limit=20,
-    fields=["username"],
 )
-return followers["items"]
 ```
 ## Navigation graph
 Use the helper that matches the question type.
@@ -146,11 +232,14 @@ Use the helper that matches the question type.
 - space search/list/discovery → `hf_spaces_search(...)`
 - cross-type repo search → `hf_repo_search(...)`
 - trending repos → `hf_trending(...)`
-- daily papers → `hf_daily_papers(...)`
 - repo discussions → `hf_repo_discussions(...)`
 - specific discussion details → `hf_repo_discussion_details(...)`
 - users who liked one repo → `hf_repo_likers(...)`
-- profile / overview / aggregate counts → `hf_profile_summary(...)`
 - followers / following lists → `hf_user_graph(...)`
 - repos a user liked → `hf_user_likes(...)`
 - recent activity feed → `hf_recent_activity(...)`
@@ -182,16 +271,12 @@ Rules:
 - `items` is the canonical list field.
 - `item` is just a singleton convenience.
 - `meta` contains helper-owned execution, limit, and coverage info.
-- When helper-owned coverage matters, prefer returning the helper envelope directly.
 ## High-signal output rules
 - Prefer compact dict/list outputs over prose when the user asked for fields.
-- Prefer summary helpers before detail hydration.
 - Use canonical snake_case keys in generated code and structured output.
 - Use `repo_id` as the display label for repos.
-- Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
-- For selective one-shot search helpers, treat `meta.limit_boundary_hit=true` as a partial/unknown-coverage warning even if `meta.truncated` is still `false`.
 - For joins/intersections/rankings, fetch the needed working set first and compute locally.
 - If the result is partial, use top-level keys `results` and `coverage`.
@@ -205,7 +290,7 @@ await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' =
 await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
-await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
 await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
@@ -213,8 +298,14 @@ await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | N
 await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
 await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
 await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
 await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
@@ -296,24 +387,27 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
 ### hf_daily_papers
 - category: `curated_feed`
 - returns:
   - envelope: `{ok, item, items, meta, error}`
-  - row_type: `daily_paper`
-  - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
-  - guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
-  - optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
-- supported_params: `limit`, `where`, `fields`
 - fields_contract:
-  - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
   - canonical_only: `true`
 - where_contract:
-  - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
   - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
   - normalized_only: `true`
 - limit_contract:
   - default_limit: `20`
   - max_limit: `500`
-- notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
 ### hf_datasets_search
@@ -388,6 +482,45 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
   - scan_max: `10000`
 - notes: Returns organization member summary rows.
 ### hf_profile_summary
 - category: `profile_summary`
@@ -402,6 +535,22 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
   - include: `likes`, `activity`
 - notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
 ### hf_recent_activity
 - category: `activity_feed`

 - You are writing Python to be executed in a secure runtime environment.
 - **NEVER** use `import` - it is NOT available in this environment.
 - All helper calls are async: always use `await`.
+- Write a top-level Monty Python script. Use a shape like:
 ```py
+resp = await hf_models_search(limit=min(max_calls, 10))
+result = resp["items"]
+result
 ```
+- `max_calls` is a runtime-provided top-level input.
 - `max_calls` is the total external-call budget for the whole program.
+- Always assign the final output to `result`.
+- End the script with a final line containing only `result`.
+- Never stop after `result = ...`; always add a final bare `result` line.
+- Do **not** define or call `solve(...)`.
 - Use only documented `hf_*` helpers.
+- `result` must be plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
+- Do **not** hand-build JSON strings, markdown strings, or your own transport wrapper like `{result: ..., meta: ...}` unless the user explicitly asked for prose.
+- If the user says "return only" some fields, make `result` exactly that shape.
+- If a helper already returns the requested row shape, use `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, set `result = {"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
 - For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
+- For current-user follower/following aggregation prompts, prefer `hf_user_graph(relation=..., ...)` directly instead of `hf_whoami()` plus a second graph call. This saves a call and avoids unnecessary branching.
+- If a current-user helper returns `ok=false`, assign that helper response to `result`.
+- For relationship / aggregation questions (followers, members, likes, likers, intersections), preserve attribution in `result` unless the user explicitly asked for a collapsed deduped list.
+- Do **not** choose tiny hard-coded limits like `5` for follower/member/likes aggregation unless the user explicitly asked for a tiny sample. Prefer larger limits and preserve coverage when partial.
+- If you branch on an error path, you must still end the module with a final top-level bare `result` line outside every `if` / loop.
 ## Search rules
 - `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
 - When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
 - For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
+- For follower/following/member/liker queries that require local filtering on actor fields such as `username` or `fullname`, prefer a bounded scan like `limit=100` / `scan_limit=100` by default, or at most about `200` when a slightly broader sample is justified. Do **not** jump to `1000` unless the user explicitly asked for exhaustive coverage or a very large sample.
 - Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
 - Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
+- `hf_collections_search(owner=...)` filters owners case-insensitively, so preserve the user-provided owner spelling but use the owner argument directly.
 - Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
+- For paper discovery, use `hf_papers_search(...)` for search, `hf_daily_papers(...)` for the curated daily feed, `hf_paper_info(...)` for exact metadata, and `hf_read_paper(...)` for markdown content.
+- The main Hub-native join points on paper rows are `organization`, `submitted_by`, and `author_usernames`. Papers do not expose first-class model/dataset/space repo IDs.
+- For profile/detail/social questions about a user or org — bio, description, display name, website, GitHub, Twitter/X, LinkedIn, Bluesky, organizations, or pro status — use `hf_profile_summary(...)` first.
+- For join-style questions that need profile details for followers, following, members, likers, or other actor lists, first fetch a **bounded** actor list, filter locally on actor fields like `username` / `fullname`, then hydrate only the bounded matches with `hf_profile_summary(...)`.
+- Do **not** set the initial actor-list limit equal to the whole remaining call budget when each match needs a follow-up profile lookup; reserve budget for the profile-detail calls and return coverage if the hydration step is partial.
 - For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
 - For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
 - Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
 - Push constraints upstream whenever a first-class helper argument exists.
 - `post_filter` is only for normalized row filters that cannot be pushed upstream.
+- For created/updated date constraints, pair local `post_filter` with the matching sort (`created_at` or `last_modified`). Do **not** rely on date-only `post_filter` over an unsorted repo search window.
 - Keep `post_filter` simple:
   - exact match or `in` for returned fields like `runtime_stage`
   - `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
+  - `gte` / `lte` also work for normalized ISO timestamp fields like `created_at` and `last_modified`
 - `num_params` is one of the main valid reasons to use `post_filter` on model search today.
 - Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
+## Common repo fields
+- `repo_id`
+- `repo_type`
+- `author`
+- `likes`
+- `downloads`
+- `created_at`
+- `last_modified`
+- `num_params`
+- `repo_url`
+- model: `library_name`, `pipeline_tag`
+- dataset: `description`, `paperswithcode_id`
+- space: `sdk`, `models`, `datasets`, `subdomain`
+## Common collection fields
+- `collection_id`
+- `title`
+- `owner`
+- `description`
+- `last_updated`
+- `item_count`
+- use `hf_collections_search(owner="<org-or-user>", ...)` for owner lookups
+## Common paper join points
+- `organization`
+- `submitted_by`
+- `author_usernames`
+- `discussion_id`
 Examples:
 ```py
+result = await hf_models_search(pipeline_tag="text-to-image", limit=10)
+result
+```
+```py
+result = await hf_models_search(
     pipeline_tag="text-generation",
     sort="trending_score",
     limit=50,
     post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
 )
+result
+```
+```py
+result = await hf_collections_search(owner="Qwen", limit=10)
+result
 ```
 Field-only pattern:
     fields=["repo_id", "author", "likes", "downloads", "repo_url"],
     limit=3,
 )
+result = resp["items"]
+result
 ```
 Coverage pattern:
     limit=20,
     fields=["repo_id", "repo_likes", "repo_url"],
 )
+result = {"results": resp["items"], "coverage": resp["meta"]}
+result
 ```
 Owner-inventory pattern:
 )
 meta = resp.get("meta") or {}
 if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
+    result = {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
+else:
+    result = resp["items"]
+result
 ```
+Bounded join pattern:
 ```py
+followers_resp = await hf_user_graph(
     relation="followers",
+    limit=100,
+    scan_limit=100,
+    fields=["username", "fullname"],
 )
+followers = followers_resp.get("items") or []
+matches = []
+for follower in followers:
+    username = follower.get("username")
+    fullname = follower.get("fullname")
+    starts_with_b = (
+        (isinstance(username, str) and username.lower().startswith("b"))
+        or (isinstance(fullname, str) and fullname.lower().startswith("b"))
+    )
+    if starts_with_b:
+        matches.append(follower)
+remaining_profile_calls = max(0, max_calls - 1)
+results = []
+for follower in matches[:remaining_profile_calls]:
+    username = follower.get("username")
+    if not username:
+        continue
+    profile = await hf_profile_summary(handle=username)
+    item = profile.get("item") or {}
+    results.append(
+        {
+            "username": username,
+            "fullname": follower.get("fullname"),
+            "github_url": item.get("github_url"),
+        }
+    )
+result = {
+    "results": results,
+    "coverage": {
+        "followers": followers_resp.get("meta") or {},
+        "matching_followers_seen": len(matches),
+        "profile_calls_used": len(results),
+        "profile_hydration_partial": len(matches) > len(results),
+    },
+}
+result
 ```
+Use the same pattern for other bounded joins:
+- actor list → filter locally → hydrate exact matches
+- actor list → per-actor likes/details → aggregate under `results`
+- preserve upstream helper `meta` under top-level `coverage` whenever partiality matters
 ## Navigation graph
 Use the helper that matches the question type.
 - space search/list/discovery → `hf_spaces_search(...)`
 - cross-type repo search → `hf_repo_search(...)`
 - trending repos → `hf_trending(...)`
+- Daily papers → `hf_daily_papers(...)`
+- paper search → `hf_papers_search(...)`
+- paper detail → `hf_paper_info(...)`
+- paper markdown → `hf_read_paper(...)`
 - repo discussions → `hf_repo_discussions(...)`
 - specific discussion details → `hf_repo_discussion_details(...)`
 - users who liked one repo → `hf_repo_likers(...)`
+- profile / overview / social/detail / aggregate counts → `hf_profile_summary(...)`
 - followers / following lists → `hf_user_graph(...)`
 - repos a user liked → `hf_user_likes(...)`
 - recent activity feed → `hf_recent_activity(...)`
 - `items` is the canonical list field.
 - `item` is just a singleton convenience.
 - `meta` contains helper-owned execution, limit, and coverage info.
 ## High-signal output rules
 - Prefer compact dict/list outputs over prose when the user asked for fields.
 - Use canonical snake_case keys in generated code and structured output.
 - Use `repo_id` as the display label for repos.
 - For joins/intersections/rankings, fetch the needed working set first and compute locally.
 - If the result is partial, use top-level keys `results` and `coverage`.
 await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
+await hf_daily_papers(date: 'str | None' = None, week: 'str | None' = None, month: 'str | None' = None, submitter: 'str | None' = None, sort: 'str | None' = None, p: 'int | None' = None, limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
 await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
 await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
+await hf_paper_info(paper_id: 'str', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
+await hf_papers_search(query: 'str', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
 await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
+await hf_read_paper(paper_id: 'str') -> 'dict[str, Any]'
 await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
 await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
 ### hf_daily_papers
 - category: `curated_feed`
+- backed_by: `HfApi.list_daily_papers`
 - returns:
   - envelope: `{ok, item, items, meta, error}`
+  - row_type: `paper`
+  - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+  - guaranteed_fields: `paper_id`, `title`, `published_at`
+  - optional_fields: `summary`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+- supported_params: `date`, `week`, `month`, `submitter`, `sort`, `p`, `limit`, `where`, `fields`
+- param_values:
+  - sort: `published_at`, `trending`
 - fields_contract:
+  - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
   - canonical_only: `true`
 - where_contract:
+  - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
   - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
   - normalized_only: `true`
 - limit_contract:
   - default_limit: `20`
   - max_limit: `500`
+- notes: Curated daily papers feed backed by HfApi.list_daily_papers. Useful join points: organization, submitted_by, author_usernames, discussion_id.
 ### hf_datasets_search
   - scan_max: `10000`
 - notes: Returns organization member summary rows.
+### hf_paper_info
+- category: `paper_detail`
+- backed_by: `HfApi.paper_info`
+- returns:
+  - envelope: `{ok, item, items, meta, error}`
+  - row_type: `paper`
+  - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+  - guaranteed_fields: `paper_id`, `title`, `published_at`
+  - optional_fields: `summary`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+- supported_params: `paper_id`, `fields`
+- fields_contract:
+  - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+  - canonical_only: `true`
+- notes: Exact paper metadata helper backed by HfApi.paper_info.
+### hf_papers_search
+- category: `paper_search`
+- backed_by: `HfApi.list_papers`
+- returns:
+  - envelope: `{ok, item, items, meta, error}`
+  - row_type: `paper`
+  - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+  - guaranteed_fields: `paper_id`, `title`, `published_at`
+  - optional_fields: `summary`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+- supported_params: `query`, `limit`, `where`, `fields`
+- fields_contract:
+  - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+  - canonical_only: `true`
+- where_contract:
+  - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
+  - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
+  - normalized_only: `true`
+- limit_contract:
+  - default_limit: `20`
+  - max_limit: `500`
+- notes: Paper search helper backed by HfApi.list_papers. Use organization, submitted_by, and author_usernames as the main Hub-native join points.
 ### hf_profile_summary
 - category: `profile_summary`
   - include: `likes`, `activity`
 - notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
+### hf_read_paper
+- category: `paper_markdown`
+- backed_by: `HfApi.read_paper`
+- returns:
+  - envelope: `{ok, item, items, meta, error}`
+  - row_type: `paper_content`
+  - default_fields: `paper_id`, `content`
+  - guaranteed_fields: `paper_id`, `content`
+  - optional_fields: []
+- supported_params: `paper_id`
+- fields_contract:
+  - allowed_fields: `paper_id`, `content`
+  - canonical_only: `true`
+- notes: Returns paper markdown content backed by HfApi.read_paper.
 ### hf_recent_activity
 - category: `activity_feed`

hf-hub-query.md CHANGED Viewed

@@ -1,19 +1,22 @@
 ---
 type: agent
 name: hf_hub_query
-model: gpt-oss
 use_history: false
 default: true
 description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, max_pages, and ranking_window for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
 shell: false
 skills: []
 function_tools:
-  - tool_entrypoints.py:hf_hub_query_raw
 request_params:
   tool_result_mode: passthrough
 ---
-reasoning: high
 You are a **tool-using, read-only** Hugging Face Hub search/navigation agent.
 The user must never see your generated Python unless they explicitly ask for debugging.
@@ -23,18 +26,23 @@ The user must never see your generated Python unless they explicitly ask for deb
 - Put the generated Python only in the tool's `code` argument.
 - Do **not** output planning text, pseudocode, code fences, or contract explanations before the tool call.
 - Only ask a brief clarification question if the request is genuinely ambiguous or missing required identity.
-- The generated program must define `async def solve(query, max_calls): ...` and end with `await solve(query, max_calls)`.
-- Use the original user request, or a tight restatement, as the tool `query`.
 - Do **not** pass explicit `max_calls` or `timeout_sec` tool arguments unless the user explicitly asked for a non-default budget/timeout. Let the runtime defaults apply for ordinary requests.
 - One user request = one `hf_hub_query_raw` call. Do **not** retry in the same turn.
 ## Raw return rules
-- The return value of `solve(...)` is the user-facing payload.
-- Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
 - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
-- Prefer returning outputs directly unless post-processing is required. Do **NOT** rename fields unless asked specifically.
-- Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
 - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
-- Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.
 {{file:_monty_codegen_shared.md}}

 ---
 type: agent
 name: hf_hub_query
+model: hf.openai/gpt-oss-120b:sambanova
 use_history: false
 default: true
 description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, max_pages, and ranking_window for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
 shell: false
 skills: []
 function_tools:
+  - entrypoint: tool_entrypoints.py:hf_hub_query_raw
+    variant: code
+    code_arg: code
+    language: python
 request_params:
   tool_result_mode: passthrough
 ---
+reasoning: medium
 You are a **tool-using, read-only** Hugging Face Hub search/navigation agent.
 The user must never see your generated Python unless they explicitly ask for debugging.
 - Put the generated Python only in the tool's `code` argument.
 - Do **not** output planning text, pseudocode, code fences, or contract explanations before the tool call.
 - Only ask a brief clarification question if the request is genuinely ambiguous or missing required identity.
+- The generated program is a top-level Monty Python script.
+- `max_calls` is provided by the runtime as a top-level input.
+- Always assign the final output to `result`.
+- The final line must be exactly `result`.
+- Never stop after `result = ...`; always add a final bare `result` line.
+- Do **not** define or call `solve(...)`.
+- The tool call only needs `code` unless you truly need optional raw-query metadata.
 - Do **not** pass explicit `max_calls` or `timeout_sec` tool arguments unless the user explicitly asked for a non-default budget/timeout. Let the runtime defaults apply for ordinary requests.
 - One user request = one `hf_hub_query_raw` call. Do **not** retry in the same turn.
 ## Raw return rules
+- The value of `result` is the user-facing payload.
+- Make `result` a dict/list when JSON is appropriate; use a string/number/bool only when that scalar is the intended payload.
 - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
+- Prefer emitting outputs directly unless post-processing is required. Do **NOT** rename fields unless asked specifically.
+- Runtime will wrap the value of `result` under `result` and attach runtime information under `meta`.
 - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
+- Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` in generated code.
 {{file:_monty_codegen_shared.md}}

monty_api/__pycache__/__init__.cpython-313.pyc DELETED Viewed

Binary file (741 Bytes)

monty_api/__pycache__/__init__.cpython-314.pyc DELETED Viewed

Binary file (371 Bytes)

monty_api/__pycache__/aliases.cpython-313.pyc DELETED Viewed

Binary file (901 Bytes)

monty_api/__pycache__/aliases.cpython-314.pyc DELETED Viewed

Binary file (828 Bytes)

monty_api/__pycache__/constants.cpython-313.pyc DELETED Viewed

Binary file (2.99 kB)

monty_api/__pycache__/constants.cpython-314.pyc DELETED Viewed

Binary file (2.97 kB)

monty_api/__pycache__/context_types.cpython-313.pyc DELETED Viewed

Binary file (1.34 kB)

monty_api/__pycache__/context_types.cpython-314.pyc DELETED Viewed

Binary file (1.6 kB)

monty_api/__pycache__/helper_contracts.cpython-313.pyc DELETED Viewed

Binary file (20.8 kB)

monty_api/__pycache__/helper_contracts.cpython-314.pyc DELETED Viewed

Binary file (23.5 kB)

monty_api/__pycache__/http_runtime.cpython-313.pyc DELETED Viewed

Binary file (28.5 kB)

monty_api/__pycache__/http_runtime.cpython-314.pyc DELETED Viewed

Binary file (33.2 kB)

monty_api/__pycache__/query_entrypoints.cpython-313.pyc DELETED Viewed

Binary file (15.5 kB)

monty_api/__pycache__/query_entrypoints.cpython-314.pyc DELETED Viewed

Binary file (17.9 kB)

monty_api/__pycache__/registry.cpython-313.pyc DELETED Viewed

Binary file (14.5 kB)

monty_api/__pycache__/registry.cpython-314.pyc DELETED Viewed

Binary file (15.6 kB)

monty_api/__pycache__/runtime_context.cpython-313.pyc DELETED Viewed

Binary file (12 kB)

monty_api/__pycache__/runtime_context.cpython-314.pyc DELETED Viewed

Binary file (13.5 kB)

monty_api/__pycache__/runtime_envelopes.cpython-313.pyc DELETED Viewed

Binary file (10.2 kB)

monty_api/__pycache__/runtime_envelopes.cpython-314.pyc DELETED Viewed

Binary file (12 kB)

monty_api/__pycache__/runtime_filtering.cpython-313.pyc DELETED Viewed

Binary file (8.69 kB)

monty_api/__pycache__/runtime_filtering.cpython-314.pyc DELETED Viewed

Binary file (10.6 kB)

monty_api/__pycache__/tool_entrypoints.cpython-313.pyc DELETED Viewed

Binary file (1.79 kB)

monty_api/__pycache__/tool_entrypoints.cpython-314.pyc DELETED Viewed

Binary file (1.98 kB)

monty_api/__pycache__/validation.cpython-313.pyc DELETED Viewed

Binary file (16.2 kB)

monty_api/__pycache__/validation.cpython-314.pyc DELETED Viewed

Binary file (18.6 kB)

monty_api/constants.py CHANGED Viewed

@@ -183,22 +183,24 @@ COLLECTION_CANONICAL_FIELDS: tuple[str, ...] = (
     "item_count",
 )
-DAILY_PAPER_CANONICAL_FIELDS: tuple[str, ...] = (
     "paper_id",
     "title",
     "summary",
     "published_at",
-    "submitted_on_daily_at",
     "authors",
     "organization",
     "submitted_by",
     "discussion_id",
     "upvotes",
-    "github_repo_url",
     "github_stars",
-    "project_page_url",
-    "num_comments",
-    "is_author_participating",
-    "repo_id",
     "rank",
 )

     "item_count",
 )
+PAPER_CANONICAL_FIELDS: tuple[str, ...] = (
     "paper_id",
     "title",
     "summary",
     "published_at",
+    "submitted_at",
     "authors",
+    "author_usernames",
     "organization",
     "submitted_by",
     "discussion_id",
     "upvotes",
+    "source",
+    "comments",
+    "project_page",
+    "github_repo",
     "github_stars",
     "rank",
 )
+PAPER_CONTENT_FIELDS: tuple[str, ...] = ("paper_id", "content")

monty_api/helper_contracts.py CHANGED Viewed

@@ -16,9 +16,10 @@ from .constants import (
     ACTIVITY_CANONICAL_FIELDS,
     ACTOR_CANONICAL_FIELDS,
     COLLECTION_CANONICAL_FIELDS,
-    DAILY_PAPER_CANONICAL_FIELDS,
     DISCUSSION_CANONICAL_FIELDS,
     DISCUSSION_DETAIL_CANONICAL_FIELDS,
     PROFILE_CANONICAL_FIELDS,
     REPO_CANONICAL_FIELDS,
     USER_CANONICAL_FIELDS,
@@ -76,9 +77,10 @@ FIELD_GROUPS: dict[str, list[str]] = {
     "activity": list(ACTIVITY_CANONICAL_FIELDS),
     "actor": list(ACTOR_CANONICAL_FIELDS),
     "collection": list(COLLECTION_CANONICAL_FIELDS),
-    "daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
     "discussion": list(DISCUSSION_CANONICAL_FIELDS),
     "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
     "profile": list(PROFILE_CANONICAL_FIELDS),
     "repo": list(REPO_CANONICAL_FIELDS),
     "trending_repo": list(TRENDING_CANONICAL_FIELDS),
@@ -109,10 +111,12 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
     },
     "hf_daily_papers": {
         "category": "curated_feed",
-        "row_type": "daily_paper",
-        "fields_group": "daily_paper",
         "filter_param": "where",
-        "filter_group": "daily_paper",
     },
     "hf_datasets_search": {
         "category": "wrapped_hf_repo_search",
@@ -142,6 +146,20 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
         "row_type": "profile",
         "param_values": {"include": ["likes", "activity"]},
     },
     "hf_recent_activity": {
         "category": "activity_feed",
         "row_type": "activity",
@@ -189,6 +207,12 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
         "row_type": "runtime_capability",
         "param_values": {"section": list(RUNTIME_CAPABILITY_SECTION_VALUES)},
     },
     "hf_spaces_search": {
         "category": "wrapped_hf_repo_search",
         "row_type": "repo",
@@ -396,6 +420,9 @@ def build_helper_contracts(
         param_values = _param_values_for_helper(helper_name)
         if param_values is not None:
             contract["param_values"] = param_values
         upstream_repo_type = spec.get("upstream_repo_type")
         if isinstance(upstream_repo_type, str):

     ACTIVITY_CANONICAL_FIELDS,
     ACTOR_CANONICAL_FIELDS,
     COLLECTION_CANONICAL_FIELDS,
     DISCUSSION_CANONICAL_FIELDS,
     DISCUSSION_DETAIL_CANONICAL_FIELDS,
+    PAPER_CANONICAL_FIELDS,
+    PAPER_CONTENT_FIELDS,
     PROFILE_CANONICAL_FIELDS,
     REPO_CANONICAL_FIELDS,
     USER_CANONICAL_FIELDS,
     "activity": list(ACTIVITY_CANONICAL_FIELDS),
     "actor": list(ACTOR_CANONICAL_FIELDS),
     "collection": list(COLLECTION_CANONICAL_FIELDS),
     "discussion": list(DISCUSSION_CANONICAL_FIELDS),
     "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
+    "paper": list(PAPER_CANONICAL_FIELDS),
+    "paper_content": list(PAPER_CONTENT_FIELDS),
     "profile": list(PROFILE_CANONICAL_FIELDS),
     "repo": list(REPO_CANONICAL_FIELDS),
     "trending_repo": list(TRENDING_CANONICAL_FIELDS),
     },
     "hf_daily_papers": {
         "category": "curated_feed",
+        "row_type": "paper",
+        "fields_group": "paper",
         "filter_param": "where",
+        "filter_group": "paper",
+        "param_values": {"sort": ["published_at", "trending"]},
+        "backed_by": "HfApi.list_daily_papers",
     },
     "hf_datasets_search": {
         "category": "wrapped_hf_repo_search",
         "row_type": "profile",
         "param_values": {"include": ["likes", "activity"]},
     },
+    "hf_paper_info": {
+        "category": "paper_detail",
+        "row_type": "paper",
+        "fields_group": "paper",
+        "backed_by": "HfApi.paper_info",
+    },
+    "hf_papers_search": {
+        "category": "paper_search",
+        "row_type": "paper",
+        "fields_group": "paper",
+        "filter_param": "where",
+        "filter_group": "paper",
+        "backed_by": "HfApi.list_papers",
+    },
     "hf_recent_activity": {
         "category": "activity_feed",
         "row_type": "activity",
         "row_type": "runtime_capability",
         "param_values": {"section": list(RUNTIME_CAPABILITY_SECTION_VALUES)},
     },
+    "hf_read_paper": {
+        "category": "paper_markdown",
+        "row_type": "paper_content",
+        "fields_group": "paper_content",
+        "backed_by": "HfApi.read_paper",
+    },
     "hf_spaces_search": {
         "category": "wrapped_hf_repo_search",
         "row_type": "repo",
         param_values = _param_values_for_helper(helper_name)
         if param_values is not None:
             contract["param_values"] = param_values
+        backed_by = spec.get("backed_by")
+        if isinstance(backed_by, str):
+            contract["backed_by"] = backed_by
         upstream_repo_type = spec.get("upstream_repo_type")
         if isinstance(upstream_repo_type, str):

monty_api/helpers/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .activity import register_activity_helpers
 from .collections import register_collection_helpers
 from .introspection import register_introspection_helpers
 from .profiles import register_profile_helpers
 from .repos import register_repo_helpers
@@ -8,6 +9,7 @@ __all__ = [
     "register_activity_helpers",
     "register_collection_helpers",
     "register_introspection_helpers",
     "register_profile_helpers",
     "register_repo_helpers",
 ]

 from .activity import register_activity_helpers
 from .collections import register_collection_helpers
 from .introspection import register_introspection_helpers
+from .papers import register_paper_helpers
 from .profiles import register_profile_helpers
 from .repos import register_repo_helpers
     "register_activity_helpers",
     "register_collection_helpers",
     "register_introspection_helpers",
+    "register_paper_helpers",
     "register_profile_helpers",
     "register_repo_helpers",
 ]

monty_api/helpers/__pycache__/__init__.cpython-313.pyc DELETED Viewed

Binary file (487 Bytes)

monty_api/helpers/__pycache__/__init__.cpython-314.pyc DELETED Viewed

Binary file (489 Bytes)

monty_api/helpers/__pycache__/activity.cpython-313.pyc DELETED Viewed

Binary file (8.71 kB)

monty_api/helpers/__pycache__/activity.cpython-314.pyc DELETED Viewed

Binary file (9.3 kB)

monty_api/helpers/__pycache__/collections.cpython-313.pyc DELETED Viewed

Binary file (12.7 kB)

monty_api/helpers/__pycache__/collections.cpython-314.pyc DELETED Viewed

Binary file (13.8 kB)

monty_api/helpers/__pycache__/common.cpython-313.pyc DELETED Viewed

Binary file (1.5 kB)

monty_api/helpers/__pycache__/common.cpython-314.pyc DELETED Viewed

Binary file (1.64 kB)

monty_api/helpers/__pycache__/introspection.cpython-313.pyc DELETED Viewed

Binary file (11.1 kB)

monty_api/helpers/__pycache__/introspection.cpython-314.pyc DELETED Viewed

Binary file (12.4 kB)

monty_api/helpers/__pycache__/profiles.cpython-313.pyc DELETED Viewed

Binary file (32.7 kB)

monty_api/helpers/__pycache__/profiles.cpython-314.pyc DELETED Viewed

Binary file (35.3 kB)

monty_api/helpers/__pycache__/repos.cpython-313.pyc DELETED Viewed

Binary file (49.3 kB)

monty_api/helpers/__pycache__/repos.cpython-314.pyc DELETED Viewed

Binary file (53.2 kB)

monty_api/helpers/introspection.py CHANGED Viewed

@@ -10,7 +10,6 @@ from ..constants import (
     ACTIVITY_CANONICAL_FIELDS,
     ACTOR_CANONICAL_FIELDS,
     COLLECTION_CANONICAL_FIELDS,
-    DAILY_PAPER_CANONICAL_FIELDS,
     DISCUSSION_CANONICAL_FIELDS,
     DISCUSSION_DETAIL_CANONICAL_FIELDS,
     DEFAULT_MAX_CALLS,
@@ -19,6 +18,8 @@ from ..constants import (
     LIKES_SCAN_LIMIT_CAP,
     MAX_CALLS_LIMIT,
     OUTPUT_ITEMS_TRUNCATION_LIMIT,
     PROFILE_CANONICAL_FIELDS,
     RECENT_ACTIVITY_SCAN_MAX_PAGES,
     REPO_CANONICAL_FIELDS,
@@ -140,7 +141,8 @@ async def hf_runtime_capabilities(
             "user_likes": list(USER_LIKES_CANONICAL_FIELDS),
             "activity": list(ACTIVITY_CANONICAL_FIELDS),
             "collection": list(COLLECTION_CANONICAL_FIELDS),
-            "daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
             "discussion": list(DISCUSSION_CANONICAL_FIELDS),
             "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
         },

     ACTIVITY_CANONICAL_FIELDS,
     ACTOR_CANONICAL_FIELDS,
     COLLECTION_CANONICAL_FIELDS,
     DISCUSSION_CANONICAL_FIELDS,
     DISCUSSION_DETAIL_CANONICAL_FIELDS,
     DEFAULT_MAX_CALLS,
     LIKES_SCAN_LIMIT_CAP,
     MAX_CALLS_LIMIT,
     OUTPUT_ITEMS_TRUNCATION_LIMIT,
+    PAPER_CANONICAL_FIELDS,
+    PAPER_CONTENT_FIELDS,
     PROFILE_CANONICAL_FIELDS,
     RECENT_ACTIVITY_SCAN_MAX_PAGES,
     REPO_CANONICAL_FIELDS,
             "user_likes": list(USER_LIKES_CANONICAL_FIELDS),
             "activity": list(ACTIVITY_CANONICAL_FIELDS),
             "collection": list(COLLECTION_CANONICAL_FIELDS),
+            "paper": list(PAPER_CANONICAL_FIELDS),
+            "paper_content": list(PAPER_CONTENT_FIELDS),
             "discussion": list(DISCUSSION_CANONICAL_FIELDS),
             "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
         },

monty_api/helpers/papers.py ADDED Viewed

	@@ -0,0 +1,318 @@

+from __future__ import annotations
+from functools import partial
+from typing import Any, Callable
+from ..constants import OUTPUT_ITEMS_TRUNCATION_LIMIT, PAPER_CANONICAL_FIELDS
+from ..context_types import HelperRuntimeContext
+def _extract_author_usernames(authors: list[Any] | None) -> list[str] | None:
+    if not isinstance(authors, list):
+        return None
+    usernames: list[str] = []
+    for author in authors:
+        user = getattr(author, "user", None)
+        for candidate in (
+            getattr(user, "username", None),
+            getattr(user, "user", None),
+            getattr(user, "name", None),
+        ):
+            if isinstance(candidate, str):
+                cleaned = candidate.strip()
+                if cleaned and cleaned not in usernames:
+                    usernames.append(cleaned)
+                    break
+    return usernames or None
+def _normalize_paper_sort(sort: str | None) -> tuple[str | None, str | None]:
+    cleaned = str(sort or "").strip()
+    if not cleaned:
+        return (None, None)
+    alias_map = {
+        "published_at": "publishedAt",
+        "publishedAt": "publishedAt",
+        "trending": "trending",
+    }
+    resolved = alias_map.get(cleaned)
+    if resolved is None:
+        return (None, "sort must be one of published_at, publishedAt, trending")
+    return (resolved, None)
+def _normalize_paper_info(
+    ctx: HelperRuntimeContext,
+    paper: Any,
+    *,
+    rank: int | None = None,
+) -> dict[str, Any]:
+    authors = getattr(paper, "authors", None)
+    organization = getattr(paper, "organization", None)
+    submitted_by = getattr(paper, "submitted_by", None)
+    row = {
+        "paper_id": getattr(paper, "id", None),
+        "title": getattr(paper, "title", None),
+        "summary": getattr(paper, "summary", None),
+        "published_at": ctx._dt_to_str(getattr(paper, "published_at", None)),
+        "submitted_at": ctx._dt_to_str(getattr(paper, "submitted_at", None)),
+        "authors": ctx._extract_author_names(authors),
+        "author_usernames": _extract_author_usernames(authors),
+        "organization": ctx._extract_profile_name(organization),
+        "submitted_by": ctx._extract_profile_name(submitted_by),
+        "discussion_id": getattr(paper, "discussion_id", None),
+        "upvotes": ctx._as_int(getattr(paper, "upvotes", None)),
+        "source": getattr(paper, "source", None),
+        "comments": ctx._as_int(getattr(paper, "comments", None)),
+        "project_page": getattr(paper, "project_page", None),
+        "github_repo": getattr(paper, "github_repo", None),
+        "github_stars": ctx._as_int(getattr(paper, "github_stars", None)),
+        "ai_summary": getattr(paper, "ai_summary", None),
+        "ai_keywords": getattr(paper, "ai_keywords", None),
+        "rank": rank,
+    }
+    return row
+async def _run_paper_list_helper(
+    ctx: HelperRuntimeContext,
+    *,
+    helper_name: str,
+    source: str,
+    loader: Callable[[Any, int], list[Any]],
+    limit: int,
+    where: dict[str, Any] | None,
+    fields: list[str] | None,
+    ordered_ranking: bool = False,
+    **meta: Any,
+) -> dict[str, Any]:
+    start_calls = ctx.call_count["n"]
+    default_limit = ctx._policy_int(helper_name, "default_limit", 20)
+    max_limit = ctx._policy_int(
+        helper_name, "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
+    )
+    requested_limit = limit
+    applied_limit = ctx._clamp_int(
+        limit,
+        default=default_limit,
+        minimum=1,
+        maximum=max_limit,
+    )
+    limit_meta = ctx._derive_limit_metadata(
+        requested_limit=requested_limit,
+        applied_limit=applied_limit,
+        default_limit_used=limit == default_limit,
+    )
+    api = ctx._get_hf_api_client()
+    try:
+        payload = ctx._host_hf_call(
+            source,
+            lambda: loader(api, applied_limit),
+        )
+    except Exception as exc:
+        return ctx._helper_error(start_calls=start_calls, source=source, error=exc)
+    items = [
+        _normalize_paper_info(ctx, paper, rank=index if ordered_ranking else None)
+        for index, paper in enumerate(payload[:applied_limit], start=1)
+    ]
+    try:
+        items = ctx._apply_where(items, where, allowed_fields=PAPER_CANONICAL_FIELDS)
+    except ValueError as exc:
+        return ctx._helper_error(start_calls=start_calls, source=source, error=exc)
+    matched = len(items)
+    try:
+        items = ctx._project_items(
+            items[:applied_limit],
+            fields,
+            allowed_fields=PAPER_CANONICAL_FIELDS,
+        )
+    except ValueError as exc:
+        return ctx._helper_error(start_calls=start_calls, source=source, error=exc)
+    limit_boundary_hit = len(payload) >= applied_limit
+    next_request_hint = None
+    if limit_boundary_hit:
+        next_request_hint = (
+            f"Increase limit above {applied_limit} to check whether more rows exist"
+        )
+    return ctx._helper_success(
+        start_calls=start_calls,
+        source=source,
+        items=items,
+        limit=applied_limit,
+        scanned=len(payload),
+        matched=matched,
+        returned=len(items),
+        ordered_ranking=ordered_ranking,
+        more_available="unknown" if limit_boundary_hit else False,
+        limit_boundary_hit=limit_boundary_hit,
+        next_request_hint=next_request_hint,
+        **limit_meta,
+        **meta,
+    )
+async def hf_daily_papers(
+    ctx: HelperRuntimeContext,
+    date: str | None = None,
+    week: str | None = None,
+    month: str | None = None,
+    submitter: str | None = None,
+    sort: str | None = None,
+    p: int | None = None,
+    limit: int = 20,
+    where: dict[str, Any] | None = None,
+    fields: list[str] | None = None,
+) -> dict[str, Any]:
+    normalized_sort, sort_error = _normalize_paper_sort(sort)
+    if sort_error:
+        return ctx._helper_error(
+            start_calls=ctx.call_count["n"],
+            source="/api/daily_papers",
+            error=sort_error,
+        )
+    return await _run_paper_list_helper(
+        ctx,
+        helper_name="hf_daily_papers",
+        source="/api/daily_papers",
+        loader=lambda api, applied_limit: list(
+            api.list_daily_papers(
+                date=date,
+                week=week,
+                month=month,
+                submitter=submitter,
+                sort=normalized_sort,
+                p=p,
+                limit=applied_limit,
+            )
+        ),
+        limit=limit,
+        where=where,
+        fields=fields,
+        ordered_ranking=True,
+        date=date,
+        week=week,
+        month=month,
+        submitter=submitter,
+        sort=normalized_sort,
+        p=p,
+    )
+async def hf_papers_search(
+    ctx: HelperRuntimeContext,
+    query: str,
+    limit: int = 20,
+    where: dict[str, Any] | None = None,
+    fields: list[str] | None = None,
+) -> dict[str, Any]:
+    term = str(query or "").strip()
+    if not term:
+        return ctx._helper_error(
+            start_calls=ctx.call_count["n"],
+            source="/api/papers/search",
+            error="query is required",
+        )
+    return await _run_paper_list_helper(
+        ctx,
+        helper_name="hf_papers_search",
+        source="/api/papers/search",
+        loader=lambda api, applied_limit: list(
+            api.list_papers(query=term, limit=applied_limit)
+        ),
+        limit=limit,
+        where=where,
+        fields=fields,
+        query=term,
+    )
+async def hf_paper_info(
+    ctx: HelperRuntimeContext,
+    paper_id: str,
+    fields: list[str] | None = None,
+) -> dict[str, Any]:
+    start_calls = ctx.call_count["n"]
+    pid = str(paper_id or "").strip()
+    if not pid:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source="/api/papers/<paper_id>",
+            error="paper_id is required",
+        )
+    try:
+        paper = ctx._host_hf_call(
+            f"/api/papers/{pid}",
+            lambda: ctx._get_hf_api_client().paper_info(id=pid),
+        )
+    except Exception as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source=f"/api/papers/{pid}",
+            error=exc,
+            paper_id=pid,
+        )
+    item = _normalize_paper_info(ctx, paper)
+    items = [item]
+    try:
+        items = ctx._project_items(items, fields, allowed_fields=PAPER_CANONICAL_FIELDS)
+    except ValueError as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source=f"/api/papers/{pid}",
+            error=exc,
+            paper_id=pid,
+        )
+    return ctx._helper_success(
+        start_calls=start_calls,
+        source=f"/api/papers/{pid}",
+        items=items,
+        paper_id=pid,
+        returned=len(items),
+        matched=len(items),
+    )
+async def hf_read_paper(
+    ctx: HelperRuntimeContext,
+    paper_id: str,
+) -> dict[str, Any]:
+    start_calls = ctx.call_count["n"]
+    pid = str(paper_id or "").strip()
+    if not pid:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source="/papers/<paper_id>.md",
+            error="paper_id is required",
+        )
+    try:
+        content = ctx._host_hf_call(
+            f"/papers/{pid}.md",
+            lambda: ctx._get_hf_api_client().read_paper(id=pid),
+        )
+    except Exception as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source=f"/papers/{pid}.md",
+            error=exc,
+            paper_id=pid,
+        )
+    return ctx._helper_success(
+        start_calls=start_calls,
+        source=f"/papers/{pid}.md",
+        items=[{"paper_id": pid, "content": content}],
+        paper_id=pid,
+        returned=1,
+        matched=1,
+    )
+def register_paper_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
+    return {
+        "hf_daily_papers": partial(hf_daily_papers, ctx),
+        "hf_papers_search": partial(hf_papers_search, ctx),
+        "hf_paper_info": partial(hf_paper_info, ctx),
+        "hf_read_paper": partial(hf_read_paper, ctx),
+    }

monty_api/helpers/profiles.py CHANGED Viewed

@@ -338,8 +338,8 @@ async def hf_org_members(
     )
     sample_complete = (
         exact_count
-        and len(normalized) <= applied_limit
-        and (not count_only or len(normalized) == 0)
     )
     more_available = ctx._derive_more_available(
         sample_complete=sample_complete,
@@ -372,13 +372,18 @@ async def hf_org_members(
             "organization": org,
         },
         limit_plan=limit_plan,
-        matched_count=len(normalized),
         returned_count=len(items),
         exact_count=exact_count,
         count_only=count_only,
         sample_complete=sample_complete,
         more_available=more_available,
-        scan_limit_hit=scan_limit_hit,
     )
     return ctx._helper_success(
         start_calls=start_calls, source=endpoint, items=items, meta=meta
@@ -573,8 +578,8 @@ async def _user_graph_helper(
     )
     sample_complete = (
         exact_count
-        and len(normalized) <= applied_limit
-        and (not count_only or len(normalized) == 0)
     )
     more_available = ctx._derive_more_available(
         sample_complete=sample_complete,
@@ -617,13 +622,18 @@ async def _user_graph_helper(
             "organization": u if entity_type == "organization" else None,
         },
         limit_plan=limit_plan,
-        matched_count=len(normalized),
         returned_count=len(items),
         exact_count=exact_count,
         count_only=count_only,
         sample_complete=sample_complete,
         more_available=more_available,
-        scan_limit_hit=scan_limit_hit,
     )
     return ctx._helper_success(
         start_calls=start_calls, source=endpoint, items=items, meta=meta

     )
     sample_complete = (
         exact_count
+        and total_matched <= applied_limit
+        and (not count_only or total_matched == 0)
     )
     more_available = ctx._derive_more_available(
         sample_complete=sample_complete,
             "organization": org,
         },
         limit_plan=limit_plan,
+        matched_count=total_matched,
         returned_count=len(items),
         exact_count=exact_count,
         count_only=count_only,
         sample_complete=sample_complete,
         more_available=more_available,
+        scan_limit_hit=scan_limit_hit
+        or (
+            overview_total is not None
+            and overview_total > observed_total
+            and observed_total >= scan_lim
+        ),
     )
     return ctx._helper_success(
         start_calls=start_calls, source=endpoint, items=items, meta=meta
     )
     sample_complete = (
         exact_count
+        and total_matched <= applied_limit
+        and (not count_only or total_matched == 0)
     )
     more_available = ctx._derive_more_available(
         sample_complete=sample_complete,
             "organization": u if entity_type == "organization" else None,
         },
         limit_plan=limit_plan,
+        matched_count=total_matched,
         returned_count=len(items),
         exact_count=exact_count,
         count_only=count_only,
         sample_complete=sample_complete,
         more_available=more_available,
+        scan_limit_hit=scan_limit_hit
+        or (
+            overview_total is not None
+            and overview_total > observed_total
+            and observed_total >= scan_lim
+        ),
     )
     return ctx._helper_success(
         start_calls=start_calls, source=endpoint, items=items, meta=meta

monty_api/helpers/repos.py CHANGED Viewed

@@ -7,7 +7,6 @@ from ..context_types import HelperRuntimeContext
 from ..helper_contracts import repo_expand_alias_map
 from ..constants import (
     ACTOR_CANONICAL_FIELDS,
-    DAILY_PAPER_CANONICAL_FIELDS,
     EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
     LIKES_ENRICHMENT_MAX_REPOS,
     LIKES_RANKING_WINDOW_DEFAULT,
@@ -1287,62 +1286,6 @@ async def hf_trending(
     )
-async def hf_daily_papers(
-    ctx: HelperRuntimeContext,
-    limit: int = 20,
-    where: dict[str, Any] | None = None,
-    fields: list[str] | None = None,
-) -> dict[str, Any]:
-    start_calls = ctx.call_count["n"]
-    default_limit = ctx._policy_int("hf_daily_papers", "default_limit", 20)
-    max_limit = ctx._policy_int(
-        "hf_daily_papers", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
-    )
-    lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
-    resp = ctx._host_raw_call("/api/daily_papers", params={"limit": lim})
-    if not resp.get("ok"):
-        return ctx._helper_error(
-            start_calls=start_calls,
-            source="/api/daily_papers",
-            error=resp.get("error") or "daily papers fetch failed",
-        )
-    payload = resp.get("data") if isinstance(resp.get("data"), list) else []
-    items: list[dict[str, Any]] = []
-    for idx, row in enumerate(payload[:lim], start=1):
-        if not isinstance(row, dict):
-            continue
-        items.append(ctx._normalize_daily_paper_row(row, rank=idx))
-    try:
-        items = ctx._apply_where(
-            items, where, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
-        )
-    except ValueError as exc:
-        return ctx._helper_error(
-            start_calls=start_calls,
-            source="/api/daily_papers",
-            error=exc,
-        )
-    matched = len(items)
-    try:
-        items = ctx._project_daily_paper_items(items[:lim], fields)
-    except ValueError as exc:
-        return ctx._helper_error(
-            start_calls=start_calls,
-            source="/api/daily_papers",
-            error=exc,
-        )
-    return ctx._helper_success(
-        start_calls=start_calls,
-        source="/api/daily_papers",
-        items=items,
-        limit=lim,
-        scanned=len(payload),
-        matched=matched,
-        returned=len(items),
-        ordered_ranking=True,
-    )
 def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
     return {
         "hf_models_search": partial(hf_models_search, ctx),
@@ -1355,5 +1298,4 @@ def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[...,
         "hf_repo_discussion_details": partial(hf_repo_discussion_details, ctx),
         "hf_repo_details": partial(hf_repo_details, ctx),
         "hf_trending": partial(hf_trending, ctx),
-        "hf_daily_papers": partial(hf_daily_papers, ctx),
     }

 from ..helper_contracts import repo_expand_alias_map
 from ..constants import (
     ACTOR_CANONICAL_FIELDS,
     EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
     LIKES_ENRICHMENT_MAX_REPOS,
     LIKES_RANKING_WINDOW_DEFAULT,
     )
 def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
     return {
         "hf_models_search": partial(hf_models_search, ctx),
         "hf_repo_discussion_details": partial(hf_repo_discussion_details, ctx),
         "hf_repo_details": partial(hf_repo_details, ctx),
         "hf_trending": partial(hf_trending, ctx),
     }

monty_api/http_runtime.py CHANGED Viewed

@@ -429,47 +429,6 @@ def _normalize_trending_row(
     return row
-def _normalize_daily_paper_row(
-    row: dict[str, Any], rank: int | None = None
-) -> dict[str, Any]:
-    paper = row.get("paper") if isinstance(row.get("paper"), dict) else {}
-    org = (
-        row.get("organization")
-        if isinstance(row.get("organization"), dict)
-        else paper.get("organization")
-    )
-    organization = None
-    if isinstance(org, dict):
-        organization = org.get("name") or org.get("fullname")
-    item = {
-        "paper_id": paper.get("id"),
-        "title": row.get("title") or paper.get("title"),
-        "summary": row.get("summary")
-        or paper.get("summary")
-        or paper.get("ai_summary"),
-        "published_at": row.get("publishedAt") or paper.get("publishedAt"),
-        "submitted_on_daily_at": paper.get("submittedOnDailyAt"),
-        "authors": _extract_author_names(paper.get("authors")),
-        "organization": organization,
-        "submitted_by": _extract_profile_name(
-            row.get("submittedBy") or paper.get("submittedOnDailyBy")
-        ),
-        "discussion_id": paper.get("discussionId"),
-        "upvotes": _as_int(paper.get("upvotes")),
-        "github_repo_url": paper.get("githubRepo"),
-        "github_stars": _as_int(paper.get("githubStars")),
-        "project_page_url": paper.get("projectPage"),
-        "num_comments": _as_int(row.get("numComments")),
-        "is_author_participating": row.get("isAuthorParticipating")
-        if isinstance(row.get("isAuthorParticipating"), bool)
-        else None,
-        "repo_id": row.get("repo_id") or paper.get("repo_id"),
-        "rank": rank,
-    }
-    return item
 def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | None:
     repo_id = row.get("id") or row.get("repoId") or row.get("repo_id")
     if not isinstance(repo_id, str) or not repo_id:

     return row
 def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | None:
     repo_id = row.get("id") or row.get("repoId") or row.get("repo_id")
     if not isinstance(repo_id, str) or not repo_id: