Spaces:
Running
Running
Deploy hf-hub-query with current fast-agent and Monty
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Dockerfile +2 -2
- _monty_codegen_shared.md +100 -131
- hf-hub-query.md +3 -1
- monty_api/__pycache__/__init__.cpython-313.pyc +0 -0
- monty_api/__pycache__/__init__.cpython-314.pyc +0 -0
- monty_api/__pycache__/aliases.cpython-313.pyc +0 -0
- monty_api/__pycache__/aliases.cpython-314.pyc +0 -0
- monty_api/__pycache__/constants.cpython-313.pyc +0 -0
- monty_api/__pycache__/constants.cpython-314.pyc +0 -0
- monty_api/__pycache__/context_types.cpython-313.pyc +0 -0
- monty_api/__pycache__/context_types.cpython-314.pyc +0 -0
- monty_api/__pycache__/helper_contracts.cpython-313.pyc +0 -0
- monty_api/__pycache__/helper_contracts.cpython-314.pyc +0 -0
- monty_api/__pycache__/http_runtime.cpython-313.pyc +0 -0
- monty_api/__pycache__/http_runtime.cpython-314.pyc +0 -0
- monty_api/__pycache__/llm_time_hook.cpython-314.pyc +0 -0
- monty_api/__pycache__/query_entrypoints.cpython-313.pyc +0 -0
- monty_api/__pycache__/query_entrypoints.cpython-314.pyc +0 -0
- monty_api/__pycache__/registry.cpython-313.pyc +0 -0
- monty_api/__pycache__/registry.cpython-314.pyc +0 -0
- monty_api/__pycache__/runtime_context.cpython-313.pyc +0 -0
- monty_api/__pycache__/runtime_context.cpython-314.pyc +0 -0
- monty_api/__pycache__/runtime_envelopes.cpython-313.pyc +0 -0
- monty_api/__pycache__/runtime_envelopes.cpython-314.pyc +0 -0
- monty_api/__pycache__/runtime_filtering.cpython-313.pyc +0 -0
- monty_api/__pycache__/runtime_filtering.cpython-314.pyc +0 -0
- monty_api/__pycache__/tool_entrypoints.cpython-313.pyc +0 -0
- monty_api/__pycache__/tool_entrypoints.cpython-314.pyc +0 -0
- monty_api/__pycache__/validation.cpython-313.pyc +0 -0
- monty_api/__pycache__/validation.cpython-314.pyc +0 -0
- monty_api/constants.py +7 -9
- monty_api/helper_contracts.py +5 -32
- monty_api/helpers/__init__.py +0 -2
- monty_api/helpers/__pycache__/__init__.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/__init__.cpython-314.pyc +0 -0
- monty_api/helpers/__pycache__/activity.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/activity.cpython-314.pyc +0 -0
- monty_api/helpers/__pycache__/collections.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/collections.cpython-314.pyc +0 -0
- monty_api/helpers/__pycache__/common.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/common.cpython-314.pyc +0 -0
- monty_api/helpers/__pycache__/introspection.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/introspection.cpython-314.pyc +0 -0
- monty_api/helpers/__pycache__/profiles.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/profiles.cpython-314.pyc +0 -0
- monty_api/helpers/__pycache__/repos.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/repos.cpython-314.pyc +0 -0
- monty_api/helpers/introspection.py +2 -4
- monty_api/helpers/profiles.py +8 -18
- monty_api/helpers/repos.py +68 -5
Dockerfile
CHANGED
|
@@ -13,9 +13,9 @@ WORKDIR /app
|
|
| 13 |
|
| 14 |
COPY wheels /tmp/wheels
|
| 15 |
RUN uv pip install --system --no-cache \
|
| 16 |
-
"fast-agent-mcp
|
| 17 |
huggingface_hub \
|
| 18 |
-
"pydantic-monty==0.0.
|
| 19 |
|
| 20 |
COPY --link ./ /app
|
| 21 |
RUN chown -R 1000:1000 /app
|
|
|
|
| 13 |
|
| 14 |
COPY wheels /tmp/wheels
|
| 15 |
RUN uv pip install --system --no-cache \
|
| 16 |
+
"fast-agent-mcp==0.6.24" \
|
| 17 |
huggingface_hub \
|
| 18 |
+
"pydantic-monty==0.0.17"
|
| 19 |
|
| 20 |
COPY --link ./ /app
|
| 21 |
RUN chown -R 1000:1000 /app
|
_monty_codegen_shared.md
CHANGED
|
@@ -50,11 +50,8 @@ result
|
|
| 50 |
- For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
|
| 51 |
- For follower/following/member/liker queries that require local filtering on actor fields such as `username` or `fullname`, prefer a bounded scan like `limit=100` / `scan_limit=100` by default, or at most about `200` when a slightly broader sample is justified. Do **not** jump to `1000` unless the user explicitly asked for exhaustive coverage or a very large sample.
|
| 52 |
- Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
|
| 53 |
-
- Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
|
| 54 |
-
- `hf_collections_search(owner=...)` filters owners case-insensitively, so preserve the user-provided owner spelling but use the owner argument directly.
|
| 55 |
- Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
|
| 56 |
-
- For paper discovery, use `hf_papers_search(...)` for search, `hf_daily_papers(...)` for the curated daily feed, `hf_paper_info(...)` for exact metadata, and `hf_read_paper(...)` for markdown content.
|
| 57 |
-
- The main Hub-native join points on paper rows are `organization`, `submitted_by`, and `author_usernames`. Papers do not expose first-class model/dataset/space repo IDs.
|
| 58 |
- For profile/detail/social questions about a user or org — bio, description, display name, website, GitHub, Twitter/X, LinkedIn, Bluesky, organizations, or pro status — use `hf_profile_summary(...)` first.
|
| 59 |
- For join-style questions that need profile details for followers, following, members, likers, or other actor lists, first fetch a **bounded** actor list, filter locally on actor fields like `username` / `fullname`, then hydrate only the bounded matches with `hf_profile_summary(...)`.
|
| 60 |
- Do **not** set the initial actor-list limit equal to the whole remaining call budget when each match needs a follow-up profile lookup; reserve budget for the profile-detail calls and return coverage if the hydration step is partial.
|
|
@@ -63,45 +60,13 @@ result
|
|
| 63 |
- Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
|
| 64 |
- Push constraints upstream whenever a first-class helper argument exists.
|
| 65 |
- `post_filter` is only for normalized row filters that cannot be pushed upstream.
|
|
|
|
| 66 |
- For created/updated date constraints, pair local `post_filter` with the matching sort (`created_at` or `last_modified`). Do **not** rely on date-only `post_filter` over an unsorted repo search window.
|
| 67 |
- Keep `post_filter` simple:
|
| 68 |
- exact match or `in` for returned fields like `runtime_stage`
|
| 69 |
-
- `gte` / `lte` for normalized numeric fields like `
|
| 70 |
- `gte` / `lte` also work for normalized ISO timestamp fields like `created_at` and `last_modified`
|
| 71 |
-
- `
|
| 72 |
-
- Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
|
| 73 |
-
|
| 74 |
-
## Common repo fields
|
| 75 |
-
|
| 76 |
-
- `repo_id`
|
| 77 |
-
- `repo_type`
|
| 78 |
-
- `author`
|
| 79 |
-
- `likes`
|
| 80 |
-
- `downloads`
|
| 81 |
-
- `created_at`
|
| 82 |
-
- `last_modified`
|
| 83 |
-
- `num_params`
|
| 84 |
-
- `repo_url`
|
| 85 |
-
- model: `library_name`, `pipeline_tag`
|
| 86 |
-
- dataset: `description`, `paperswithcode_id`
|
| 87 |
-
- space: `sdk`, `models`, `datasets`, `subdomain`
|
| 88 |
-
|
| 89 |
-
## Common collection fields
|
| 90 |
-
|
| 91 |
-
- `collection_id`
|
| 92 |
-
- `title`
|
| 93 |
-
- `owner`
|
| 94 |
-
- `description`
|
| 95 |
-
- `last_updated`
|
| 96 |
-
- `item_count`
|
| 97 |
-
- use `hf_collections_search(owner="<org-or-user>", ...)` for owner lookups
|
| 98 |
-
|
| 99 |
-
## Common paper join points
|
| 100 |
-
|
| 101 |
-
- `organization`
|
| 102 |
-
- `submitted_by`
|
| 103 |
-
- `author_usernames`
|
| 104 |
-
- `discussion_id`
|
| 105 |
|
| 106 |
Examples:
|
| 107 |
|
|
@@ -113,9 +78,9 @@ result
|
|
| 113 |
```py
|
| 114 |
result = await hf_models_search(
|
| 115 |
pipeline_tag="text-generation",
|
|
|
|
| 116 |
sort="trending_score",
|
| 117 |
limit=50,
|
| 118 |
-
post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
|
| 119 |
)
|
| 120 |
result
|
| 121 |
```
|
|
@@ -170,7 +135,7 @@ else:
|
|
| 170 |
result
|
| 171 |
```
|
| 172 |
|
| 173 |
-
|
| 174 |
|
| 175 |
```py
|
| 176 |
followers_resp = await hf_user_graph(
|
|
@@ -217,10 +182,81 @@ result = {
|
|
| 217 |
result
|
| 218 |
```
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
## Navigation graph
|
| 226 |
|
|
@@ -232,10 +268,7 @@ Use the helper that matches the question type.
|
|
| 232 |
- space search/list/discovery → `hf_spaces_search(...)`
|
| 233 |
- cross-type repo search → `hf_repo_search(...)`
|
| 234 |
- trending repos → `hf_trending(...)`
|
| 235 |
-
-
|
| 236 |
-
- paper search → `hf_papers_search(...)`
|
| 237 |
-
- paper detail → `hf_paper_info(...)`
|
| 238 |
-
- paper markdown → `hf_read_paper(...)`
|
| 239 |
- repo discussions → `hf_repo_discussions(...)`
|
| 240 |
- specific discussion details → `hf_repo_discussion_details(...)`
|
| 241 |
- users who liked one repo → `hf_repo_likers(...)`
|
|
@@ -290,22 +323,16 @@ await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' =
|
|
| 290 |
|
| 291 |
await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 292 |
|
| 293 |
-
await hf_daily_papers(
|
| 294 |
|
| 295 |
-
await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' =
|
| 296 |
|
| 297 |
-
await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' =
|
| 298 |
|
| 299 |
await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 300 |
|
| 301 |
-
await hf_paper_info(paper_id: 'str', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 302 |
-
|
| 303 |
-
await hf_papers_search(query: 'str', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 304 |
-
|
| 305 |
await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
|
| 306 |
|
| 307 |
-
await hf_read_paper(paper_id: 'str') -> 'dict[str, Any]'
|
| 308 |
-
|
| 309 |
await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 310 |
|
| 311 |
await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
@@ -316,11 +343,11 @@ await hf_repo_discussions(repo_type: 'str', repo_id: 'str', limit: 'int' = 20, f
|
|
| 316 |
|
| 317 |
await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 318 |
|
| 319 |
-
await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' =
|
| 320 |
|
| 321 |
await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
|
| 322 |
|
| 323 |
-
await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' =
|
| 324 |
|
| 325 |
await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 326 |
|
|
@@ -387,27 +414,24 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
|
| 387 |
### hf_daily_papers
|
| 388 |
|
| 389 |
- category: `curated_feed`
|
| 390 |
-
- backed_by: `HfApi.list_daily_papers`
|
| 391 |
- returns:
|
| 392 |
- envelope: `{ok, item, items, meta, error}`
|
| 393 |
-
- row_type: `
|
| 394 |
-
- default_fields: `paper_id`, `title`, `summary`, `published_at`, `
|
| 395 |
-
- guaranteed_fields: `paper_id`, `title`, `published_at`
|
| 396 |
-
- optional_fields: `summary`, `
|
| 397 |
-
- supported_params: `
|
| 398 |
-
- param_values:
|
| 399 |
-
- sort: `published_at`, `trending`
|
| 400 |
- fields_contract:
|
| 401 |
-
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `
|
| 402 |
- canonical_only: `true`
|
| 403 |
- where_contract:
|
| 404 |
-
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `
|
| 405 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 406 |
- normalized_only: `true`
|
| 407 |
- limit_contract:
|
| 408 |
- default_limit: `20`
|
| 409 |
- max_limit: `500`
|
| 410 |
-
- notes:
|
| 411 |
|
| 412 |
### hf_datasets_search
|
| 413 |
|
|
@@ -430,7 +454,7 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
|
| 430 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 431 |
- normalized_only: `true`
|
| 432 |
- limit_contract:
|
| 433 |
-
- default_limit: `
|
| 434 |
- max_limit: `5000`
|
| 435 |
- notes: Thin dataset-search wrapper around the Hub list_datasets path. Prefer this over hf_repo_search for dataset-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 436 |
|
|
@@ -444,7 +468,7 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
|
| 444 |
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 445 |
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 446 |
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 447 |
-
- supported_params: `search`, `filter`, `author`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `emissions_thresholds`, `sort`, `limit`, `expand`, `full`, `card_data`, `fetch_config`, `fields`, `post_filter`
|
| 448 |
- sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 449 |
- expand_values: `author`, `base_models`, `card_data`, `config`, `created_at`, `disabled`, `downloads`, `downloads_all_time`, `eval_results`, `gated`, `gguf`, `inference`, `inference_provider_mapping`, `last_modified`, `library_name`, `likes`, `mask_token`, `model_index`, `pipeline_tag`, `private`, `resource_group`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformers_info`, `trending_score`, `widget_data`, `xet_enabled`, `gitaly_uid`
|
| 450 |
- fields_contract:
|
|
@@ -455,7 +479,7 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
|
| 455 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 456 |
- normalized_only: `true`
|
| 457 |
- limit_contract:
|
| 458 |
-
- default_limit: `
|
| 459 |
- max_limit: `5000`
|
| 460 |
- notes: Thin model-search wrapper around the Hub list_models path. Prefer this over hf_repo_search for model-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 461 |
|
|
@@ -482,45 +506,6 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
|
| 482 |
- scan_max: `10000`
|
| 483 |
- notes: Returns organization member summary rows.
|
| 484 |
|
| 485 |
-
### hf_paper_info
|
| 486 |
-
|
| 487 |
-
- category: `paper_detail`
|
| 488 |
-
- backed_by: `HfApi.paper_info`
|
| 489 |
-
- returns:
|
| 490 |
-
- envelope: `{ok, item, items, meta, error}`
|
| 491 |
-
- row_type: `paper`
|
| 492 |
-
- default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
|
| 493 |
-
- guaranteed_fields: `paper_id`, `title`, `published_at`
|
| 494 |
-
- optional_fields: `summary`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
|
| 495 |
-
- supported_params: `paper_id`, `fields`
|
| 496 |
-
- fields_contract:
|
| 497 |
-
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
|
| 498 |
-
- canonical_only: `true`
|
| 499 |
-
- notes: Exact paper metadata helper backed by HfApi.paper_info.
|
| 500 |
-
|
| 501 |
-
### hf_papers_search
|
| 502 |
-
|
| 503 |
-
- category: `paper_search`
|
| 504 |
-
- backed_by: `HfApi.list_papers`
|
| 505 |
-
- returns:
|
| 506 |
-
- envelope: `{ok, item, items, meta, error}`
|
| 507 |
-
- row_type: `paper`
|
| 508 |
-
- default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
|
| 509 |
-
- guaranteed_fields: `paper_id`, `title`, `published_at`
|
| 510 |
-
- optional_fields: `summary`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
|
| 511 |
-
- supported_params: `query`, `limit`, `where`, `fields`
|
| 512 |
-
- fields_contract:
|
| 513 |
-
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
|
| 514 |
-
- canonical_only: `true`
|
| 515 |
-
- where_contract:
|
| 516 |
-
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_at`, `authors`, `author_usernames`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `source`, `comments`, `project_page`, `github_repo`, `github_stars`, `rank`
|
| 517 |
-
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 518 |
-
- normalized_only: `true`
|
| 519 |
-
- limit_contract:
|
| 520 |
-
- default_limit: `20`
|
| 521 |
-
- max_limit: `500`
|
| 522 |
-
- notes: Paper search helper backed by HfApi.list_papers. Use organization, submitted_by, and author_usernames as the main Hub-native join points.
|
| 523 |
-
|
| 524 |
### hf_profile_summary
|
| 525 |
|
| 526 |
- category: `profile_summary`
|
|
@@ -535,22 +520,6 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
|
| 535 |
- include: `likes`, `activity`
|
| 536 |
- notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
|
| 537 |
|
| 538 |
-
### hf_read_paper
|
| 539 |
-
|
| 540 |
-
- category: `paper_markdown`
|
| 541 |
-
- backed_by: `HfApi.read_paper`
|
| 542 |
-
- returns:
|
| 543 |
-
- envelope: `{ok, item, items, meta, error}`
|
| 544 |
-
- row_type: `paper_content`
|
| 545 |
-
- default_fields: `paper_id`, `content`
|
| 546 |
-
- guaranteed_fields: `paper_id`, `content`
|
| 547 |
-
- optional_fields: []
|
| 548 |
-
- supported_params: `paper_id`
|
| 549 |
-
- fields_contract:
|
| 550 |
-
- allowed_fields: `paper_id`, `content`
|
| 551 |
-
- canonical_only: `true`
|
| 552 |
-
- notes: Returns paper markdown content backed by HfApi.read_paper.
|
| 553 |
-
|
| 554 |
### hf_recent_activity
|
| 555 |
|
| 556 |
- category: `activity_feed`
|
|
@@ -681,7 +650,7 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
|
| 681 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 682 |
- normalized_only: `true`
|
| 683 |
- limit_contract:
|
| 684 |
-
- default_limit: `
|
| 685 |
- max_limit: `5000`
|
| 686 |
- notes: Small generic repo-search helper. Prefer hf_models_search, hf_datasets_search, or hf_spaces_search for single-type queries; use hf_repo_search for intentionally cross-type search. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 687 |
|
|
@@ -720,7 +689,7 @@ All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
|
| 720 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 721 |
- normalized_only: `true`
|
| 722 |
- limit_contract:
|
| 723 |
-
- default_limit: `
|
| 724 |
- max_limit: `5000`
|
| 725 |
- notes: Thin space-search wrapper around the Hub list_spaces path. Prefer this over hf_repo_search for space-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 726 |
|
|
|
|
| 50 |
- For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
|
| 51 |
- For follower/following/member/liker queries that require local filtering on actor fields such as `username` or `fullname`, prefer a bounded scan like `limit=100` / `scan_limit=100` by default, or at most about `200` when a slightly broader sample is justified. Do **not** jump to `1000` unless the user explicitly asked for exhaustive coverage or a very large sample.
|
| 52 |
- Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
|
| 53 |
+
- Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search; it filters owners case-insensitively.
|
|
|
|
| 54 |
- Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
|
|
|
|
|
|
|
| 55 |
- For profile/detail/social questions about a user or org — bio, description, display name, website, GitHub, Twitter/X, LinkedIn, Bluesky, organizations, or pro status — use `hf_profile_summary(...)` first.
|
| 56 |
- For join-style questions that need profile details for followers, following, members, likers, or other actor lists, first fetch a **bounded** actor list, filter locally on actor fields like `username` / `fullname`, then hydrate only the bounded matches with `hf_profile_summary(...)`.
|
| 57 |
- Do **not** set the initial actor-list limit equal to the whole remaining call budget when each match needs a follow-up profile lookup; reserve budget for the profile-detail calls and return coverage if the hydration step is partial.
|
|
|
|
| 60 |
- Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
|
| 61 |
- Push constraints upstream whenever a first-class helper argument exists.
|
| 62 |
- `post_filter` is only for normalized row filters that cannot be pushed upstream.
|
| 63 |
+
- `num_params` is a first-class upstream model-search arg; use `num_params="min:6B,max:128B"` instead of `post_filter` when possible.
|
| 64 |
- For created/updated date constraints, pair local `post_filter` with the matching sort (`created_at` or `last_modified`). Do **not** rely on date-only `post_filter` over an unsorted repo search window.
|
| 65 |
- Keep `post_filter` simple:
|
| 66 |
- exact match or `in` for returned fields like `runtime_stage`
|
| 67 |
+
- `gte` / `lte` for normalized numeric fields like `downloads` and `likes`
|
| 68 |
- `gte` / `lte` also work for normalized ISO timestamp fields like `created_at` and `last_modified`
|
| 69 |
+
- Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `num_params` on model search, `dataset_name`, `language`, `models`, or `datasets`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
Examples:
|
| 72 |
|
|
|
|
| 78 |
```py
|
| 79 |
result = await hf_models_search(
|
| 80 |
pipeline_tag="text-generation",
|
| 81 |
+
num_params="min:20B,max:80B",
|
| 82 |
sort="trending_score",
|
| 83 |
limit=50,
|
|
|
|
| 84 |
)
|
| 85 |
result
|
| 86 |
```
|
|
|
|
| 135 |
result
|
| 136 |
```
|
| 137 |
|
| 138 |
+
Follower-profile join pattern:
|
| 139 |
|
| 140 |
```py
|
| 141 |
followers_resp = await hf_user_graph(
|
|
|
|
| 182 |
result
|
| 183 |
```
|
| 184 |
|
| 185 |
+
Follower-likes aggregation pattern:
|
| 186 |
+
|
| 187 |
+
```py
|
| 188 |
+
followers_resp = await hf_user_graph(relation="followers", limit=100, fields=["username"])
|
| 189 |
+
followers = followers_resp.get("items") or []
|
| 190 |
+
results = []
|
| 191 |
+
for follower in followers:
|
| 192 |
+
username = follower.get("username")
|
| 193 |
+
if not username:
|
| 194 |
+
continue
|
| 195 |
+
likes_resp = await hf_user_likes(
|
| 196 |
+
username=username,
|
| 197 |
+
repo_types=["model"],
|
| 198 |
+
limit=20,
|
| 199 |
+
fields=["repo_id", "liked_at"],
|
| 200 |
+
)
|
| 201 |
+
results.append(
|
| 202 |
+
{
|
| 203 |
+
"follower": username,
|
| 204 |
+
"liked_models": likes_resp.get("items") or [],
|
| 205 |
+
}
|
| 206 |
+
)
|
| 207 |
+
coverage = {
|
| 208 |
+
"followers": followers_resp.get("meta") or {},
|
| 209 |
+
}
|
| 210 |
+
result = {"results": results, "coverage": coverage}
|
| 211 |
+
result
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
Current-user pro-follower model-likes pattern:
|
| 215 |
+
|
| 216 |
+
```py
|
| 217 |
+
followers_resp = await hf_user_graph(
|
| 218 |
+
relation="followers",
|
| 219 |
+
pro_only=True,
|
| 220 |
+
limit=100,
|
| 221 |
+
fields=["username"],
|
| 222 |
+
)
|
| 223 |
+
followers = followers_resp.get("items") or []
|
| 224 |
+
remaining_calls = max(0, max_calls - 1)
|
| 225 |
+
results = {}
|
| 226 |
+
partial = (
|
| 227 |
+
(followers_resp.get("meta") or {}).get("limit_boundary_hit")
|
| 228 |
+
or (followers_resp.get("meta") or {}).get("more_available") not in {False, None}
|
| 229 |
+
)
|
| 230 |
+
processed_followers = 0
|
| 231 |
+
for follower in followers:
|
| 232 |
+
if remaining_calls <= 0:
|
| 233 |
+
partial = True
|
| 234 |
+
break
|
| 235 |
+
username = follower.get("username")
|
| 236 |
+
if not username:
|
| 237 |
+
continue
|
| 238 |
+
likes_resp = await hf_user_likes(
|
| 239 |
+
username=username,
|
| 240 |
+
repo_types=["model"],
|
| 241 |
+
limit=2,
|
| 242 |
+
fields=["repo_id", "repo_author", "liked_at"],
|
| 243 |
+
)
|
| 244 |
+
remaining_calls -= 1
|
| 245 |
+
likes_meta = likes_resp.get("meta") or {}
|
| 246 |
+
if likes_meta.get("limit_boundary_hit") or likes_meta.get("more_available") not in {False, None}:
|
| 247 |
+
partial = True
|
| 248 |
+
items = likes_resp.get("items") or []
|
| 249 |
+
if items:
|
| 250 |
+
results[username] = items
|
| 251 |
+
processed_followers += 1
|
| 252 |
+
coverage = {
|
| 253 |
+
"followers": followers_resp.get("meta") or {},
|
| 254 |
+
"processed_followers": processed_followers,
|
| 255 |
+
"partial": partial,
|
| 256 |
+
}
|
| 257 |
+
result = {"results": results, "coverage": coverage}
|
| 258 |
+
result
|
| 259 |
+
```
|
| 260 |
|
| 261 |
## Navigation graph
|
| 262 |
|
|
|
|
| 268 |
- space search/list/discovery → `hf_spaces_search(...)`
|
| 269 |
- cross-type repo search → `hf_repo_search(...)`
|
| 270 |
- trending repos → `hf_trending(...)`
|
| 271 |
+
- daily papers → `hf_daily_papers(...)`
|
|
|
|
|
|
|
|
|
|
| 272 |
- repo discussions → `hf_repo_discussions(...)`
|
| 273 |
- specific discussion details → `hf_repo_discussion_details(...)`
|
| 274 |
- users who liked one repo → `hf_repo_likers(...)`
|
|
|
|
| 323 |
|
| 324 |
await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 325 |
|
| 326 |
+
await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 327 |
|
| 328 |
+
await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 100, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 329 |
|
| 330 |
+
await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, num_params: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' = 100, expand: 'list[str] | None' = None, full: 'bool | None' = None, card_data: 'bool' = False, fetch_config: 'bool' = False, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 331 |
|
| 332 |
await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
|
| 335 |
|
|
|
|
|
|
|
| 336 |
await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 337 |
|
| 338 |
await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
| 343 |
|
| 344 |
await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 345 |
|
| 346 |
+
await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' = 100, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 347 |
|
| 348 |
await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
|
| 349 |
|
| 350 |
+
await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' = 100, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 351 |
|
| 352 |
await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 353 |
|
|
|
|
| 414 |
### hf_daily_papers
|
| 415 |
|
| 416 |
- category: `curated_feed`
|
|
|
|
| 417 |
- returns:
|
| 418 |
- envelope: `{ok, item, items, meta, error}`
|
| 419 |
+
- row_type: `daily_paper`
|
| 420 |
+
- default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 421 |
+
- guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
|
| 422 |
+
- optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
|
| 423 |
+
- supported_params: `limit`, `where`, `fields`
|
|
|
|
|
|
|
| 424 |
- fields_contract:
|
| 425 |
+
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 426 |
- canonical_only: `true`
|
| 427 |
- where_contract:
|
| 428 |
+
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 429 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 430 |
- normalized_only: `true`
|
| 431 |
- limit_contract:
|
| 432 |
- default_limit: `20`
|
| 433 |
- max_limit: `500`
|
| 434 |
+
- notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
|
| 435 |
|
| 436 |
### hf_datasets_search
|
| 437 |
|
|
|
|
| 454 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 455 |
- normalized_only: `true`
|
| 456 |
- limit_contract:
|
| 457 |
+
- default_limit: `100`
|
| 458 |
- max_limit: `5000`
|
| 459 |
- notes: Thin dataset-search wrapper around the Hub list_datasets path. Prefer this over hf_repo_search for dataset-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 460 |
|
|
|
|
| 468 |
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 469 |
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 470 |
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 471 |
+
- supported_params: `search`, `filter`, `author`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `num_params`, `emissions_thresholds`, `sort`, `limit`, `expand`, `full`, `card_data`, `fetch_config`, `fields`, `post_filter`
|
| 472 |
- sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 473 |
- expand_values: `author`, `base_models`, `card_data`, `config`, `created_at`, `disabled`, `downloads`, `downloads_all_time`, `eval_results`, `gated`, `gguf`, `inference`, `inference_provider_mapping`, `last_modified`, `library_name`, `likes`, `mask_token`, `model_index`, `pipeline_tag`, `private`, `resource_group`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformers_info`, `trending_score`, `widget_data`, `xet_enabled`, `gitaly_uid`
|
| 474 |
- fields_contract:
|
|
|
|
| 479 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 480 |
- normalized_only: `true`
|
| 481 |
- limit_contract:
|
| 482 |
+
- default_limit: `100`
|
| 483 |
- max_limit: `5000`
|
| 484 |
- notes: Thin model-search wrapper around the Hub list_models path. Prefer this over hf_repo_search for model-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 485 |
|
|
|
|
| 506 |
- scan_max: `10000`
|
| 507 |
- notes: Returns organization member summary rows.
|
| 508 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
### hf_profile_summary
|
| 510 |
|
| 511 |
- category: `profile_summary`
|
|
|
|
| 520 |
- include: `likes`, `activity`
|
| 521 |
- notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
|
| 522 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
### hf_recent_activity
|
| 524 |
|
| 525 |
- category: `activity_feed`
|
|
|
|
| 650 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 651 |
- normalized_only: `true`
|
| 652 |
- limit_contract:
|
| 653 |
+
- default_limit: `100`
|
| 654 |
- max_limit: `5000`
|
| 655 |
- notes: Small generic repo-search helper. Prefer hf_models_search, hf_datasets_search, or hf_spaces_search for single-type queries; use hf_repo_search for intentionally cross-type search. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 656 |
|
|
|
|
| 689 |
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 690 |
- normalized_only: `true`
|
| 691 |
- limit_contract:
|
| 692 |
+
- default_limit: `100`
|
| 693 |
- max_limit: `5000`
|
| 694 |
- notes: Thin space-search wrapper around the Hub list_spaces path. Prefer this over hf_repo_search for space-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 695 |
|
hf-hub-query.md
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
---
|
| 2 |
type: agent
|
| 3 |
name: hf_hub_query
|
| 4 |
-
model: gpt-oss
|
| 5 |
use_history: false
|
| 6 |
default: true
|
| 7 |
description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, max_pages, and ranking_window for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
|
| 8 |
shell: false
|
| 9 |
skills: []
|
|
|
|
|
|
|
| 10 |
function_tools:
|
| 11 |
- entrypoint: tool_entrypoints.py:hf_hub_query_raw
|
| 12 |
variant: code
|
|
|
|
| 1 |
---
|
| 2 |
type: agent
|
| 3 |
name: hf_hub_query
|
| 4 |
+
model: hf.openai/gpt-oss-120b:sambanova
|
| 5 |
use_history: false
|
| 6 |
default: true
|
| 7 |
description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, max_pages, and ranking_window for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
|
| 8 |
shell: false
|
| 9 |
skills: []
|
| 10 |
+
#tool_hooks:
|
| 11 |
+
# after_llm_call: monty_api/llm_time_hook.py:display_llm_time
|
| 12 |
function_tools:
|
| 13 |
- entrypoint: tool_entrypoints.py:hf_hub_query_raw
|
| 14 |
variant: code
|
monty_api/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (741 Bytes). View file
|
|
|
monty_api/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (941 Bytes). View file
|
|
|
monty_api/__pycache__/aliases.cpython-313.pyc
ADDED
|
Binary file (901 Bytes). View file
|
|
|
monty_api/__pycache__/aliases.cpython-314.pyc
ADDED
|
Binary file (976 Bytes). View file
|
|
|
monty_api/__pycache__/constants.cpython-313.pyc
ADDED
|
Binary file (2.99 kB). View file
|
|
|
monty_api/__pycache__/constants.cpython-314.pyc
ADDED
|
Binary file (2.97 kB). View file
|
|
|
monty_api/__pycache__/context_types.cpython-313.pyc
ADDED
|
Binary file (1.34 kB). View file
|
|
|
monty_api/__pycache__/context_types.cpython-314.pyc
ADDED
|
Binary file (1.6 kB). View file
|
|
|
monty_api/__pycache__/helper_contracts.cpython-313.pyc
ADDED
|
Binary file (20.8 kB). View file
|
|
|
monty_api/__pycache__/helper_contracts.cpython-314.pyc
ADDED
|
Binary file (23.8 kB). View file
|
|
|
monty_api/__pycache__/http_runtime.cpython-313.pyc
ADDED
|
Binary file (28.5 kB). View file
|
|
|
monty_api/__pycache__/http_runtime.cpython-314.pyc
ADDED
|
Binary file (33.2 kB). View file
|
|
|
monty_api/__pycache__/llm_time_hook.cpython-314.pyc
ADDED
|
Binary file (2.94 kB). View file
|
|
|
monty_api/__pycache__/query_entrypoints.cpython-313.pyc
ADDED
|
Binary file (17.8 kB). View file
|
|
|
monty_api/__pycache__/query_entrypoints.cpython-314.pyc
ADDED
|
Binary file (20.5 kB). View file
|
|
|
monty_api/__pycache__/registry.cpython-313.pyc
ADDED
|
Binary file (14.6 kB). View file
|
|
|
monty_api/__pycache__/registry.cpython-314.pyc
ADDED
|
Binary file (15.7 kB). View file
|
|
|
monty_api/__pycache__/runtime_context.cpython-313.pyc
ADDED
|
Binary file (17.1 kB). View file
|
|
|
monty_api/__pycache__/runtime_context.cpython-314.pyc
ADDED
|
Binary file (19.3 kB). View file
|
|
|
monty_api/__pycache__/runtime_envelopes.cpython-313.pyc
ADDED
|
Binary file (10.2 kB). View file
|
|
|
monty_api/__pycache__/runtime_envelopes.cpython-314.pyc
ADDED
|
Binary file (12 kB). View file
|
|
|
monty_api/__pycache__/runtime_filtering.cpython-313.pyc
ADDED
|
Binary file (9.82 kB). View file
|
|
|
monty_api/__pycache__/runtime_filtering.cpython-314.pyc
ADDED
|
Binary file (11.9 kB). View file
|
|
|
monty_api/__pycache__/tool_entrypoints.cpython-313.pyc
ADDED
|
Binary file (1.81 kB). View file
|
|
|
monty_api/__pycache__/tool_entrypoints.cpython-314.pyc
ADDED
|
Binary file (2.03 kB). View file
|
|
|
monty_api/__pycache__/validation.cpython-313.pyc
ADDED
|
Binary file (16.8 kB). View file
|
|
|
monty_api/__pycache__/validation.cpython-314.pyc
ADDED
|
Binary file (19.6 kB). View file
|
|
|
monty_api/constants.py
CHANGED
|
@@ -183,24 +183,22 @@ COLLECTION_CANONICAL_FIELDS: tuple[str, ...] = (
|
|
| 183 |
"item_count",
|
| 184 |
)
|
| 185 |
|
| 186 |
-
|
| 187 |
"paper_id",
|
| 188 |
"title",
|
| 189 |
"summary",
|
| 190 |
"published_at",
|
| 191 |
-
"
|
| 192 |
"authors",
|
| 193 |
-
"author_usernames",
|
| 194 |
"organization",
|
| 195 |
"submitted_by",
|
| 196 |
"discussion_id",
|
| 197 |
"upvotes",
|
| 198 |
-
"
|
| 199 |
-
"comments",
|
| 200 |
-
"project_page",
|
| 201 |
-
"github_repo",
|
| 202 |
"github_stars",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
"rank",
|
| 204 |
)
|
| 205 |
-
|
| 206 |
-
PAPER_CONTENT_FIELDS: tuple[str, ...] = ("paper_id", "content")
|
|
|
|
| 183 |
"item_count",
|
| 184 |
)
|
| 185 |
|
| 186 |
+
DAILY_PAPER_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 187 |
"paper_id",
|
| 188 |
"title",
|
| 189 |
"summary",
|
| 190 |
"published_at",
|
| 191 |
+
"submitted_on_daily_at",
|
| 192 |
"authors",
|
|
|
|
| 193 |
"organization",
|
| 194 |
"submitted_by",
|
| 195 |
"discussion_id",
|
| 196 |
"upvotes",
|
| 197 |
+
"github_repo_url",
|
|
|
|
|
|
|
|
|
|
| 198 |
"github_stars",
|
| 199 |
+
"project_page_url",
|
| 200 |
+
"num_comments",
|
| 201 |
+
"is_author_participating",
|
| 202 |
+
"repo_id",
|
| 203 |
"rank",
|
| 204 |
)
|
|
|
|
|
|
monty_api/helper_contracts.py
CHANGED
|
@@ -16,10 +16,9 @@ from .constants import (
|
|
| 16 |
ACTIVITY_CANONICAL_FIELDS,
|
| 17 |
ACTOR_CANONICAL_FIELDS,
|
| 18 |
COLLECTION_CANONICAL_FIELDS,
|
|
|
|
| 19 |
DISCUSSION_CANONICAL_FIELDS,
|
| 20 |
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 21 |
-
PAPER_CANONICAL_FIELDS,
|
| 22 |
-
PAPER_CONTENT_FIELDS,
|
| 23 |
PROFILE_CANONICAL_FIELDS,
|
| 24 |
REPO_CANONICAL_FIELDS,
|
| 25 |
USER_CANONICAL_FIELDS,
|
|
@@ -77,10 +76,9 @@ FIELD_GROUPS: dict[str, list[str]] = {
|
|
| 77 |
"activity": list(ACTIVITY_CANONICAL_FIELDS),
|
| 78 |
"actor": list(ACTOR_CANONICAL_FIELDS),
|
| 79 |
"collection": list(COLLECTION_CANONICAL_FIELDS),
|
|
|
|
| 80 |
"discussion": list(DISCUSSION_CANONICAL_FIELDS),
|
| 81 |
"discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
| 82 |
-
"paper": list(PAPER_CANONICAL_FIELDS),
|
| 83 |
-
"paper_content": list(PAPER_CONTENT_FIELDS),
|
| 84 |
"profile": list(PROFILE_CANONICAL_FIELDS),
|
| 85 |
"repo": list(REPO_CANONICAL_FIELDS),
|
| 86 |
"trending_repo": list(TRENDING_CANONICAL_FIELDS),
|
|
@@ -111,12 +109,10 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
|
|
| 111 |
},
|
| 112 |
"hf_daily_papers": {
|
| 113 |
"category": "curated_feed",
|
| 114 |
-
"row_type": "
|
| 115 |
-
"fields_group": "
|
| 116 |
"filter_param": "where",
|
| 117 |
-
"filter_group": "
|
| 118 |
-
"param_values": {"sort": ["published_at", "trending"]},
|
| 119 |
-
"backed_by": "HfApi.list_daily_papers",
|
| 120 |
},
|
| 121 |
"hf_datasets_search": {
|
| 122 |
"category": "wrapped_hf_repo_search",
|
|
@@ -146,20 +142,6 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
|
|
| 146 |
"row_type": "profile",
|
| 147 |
"param_values": {"include": ["likes", "activity"]},
|
| 148 |
},
|
| 149 |
-
"hf_paper_info": {
|
| 150 |
-
"category": "paper_detail",
|
| 151 |
-
"row_type": "paper",
|
| 152 |
-
"fields_group": "paper",
|
| 153 |
-
"backed_by": "HfApi.paper_info",
|
| 154 |
-
},
|
| 155 |
-
"hf_papers_search": {
|
| 156 |
-
"category": "paper_search",
|
| 157 |
-
"row_type": "paper",
|
| 158 |
-
"fields_group": "paper",
|
| 159 |
-
"filter_param": "where",
|
| 160 |
-
"filter_group": "paper",
|
| 161 |
-
"backed_by": "HfApi.list_papers",
|
| 162 |
-
},
|
| 163 |
"hf_recent_activity": {
|
| 164 |
"category": "activity_feed",
|
| 165 |
"row_type": "activity",
|
|
@@ -207,12 +189,6 @@ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
|
|
| 207 |
"row_type": "runtime_capability",
|
| 208 |
"param_values": {"section": list(RUNTIME_CAPABILITY_SECTION_VALUES)},
|
| 209 |
},
|
| 210 |
-
"hf_read_paper": {
|
| 211 |
-
"category": "paper_markdown",
|
| 212 |
-
"row_type": "paper_content",
|
| 213 |
-
"fields_group": "paper_content",
|
| 214 |
-
"backed_by": "HfApi.read_paper",
|
| 215 |
-
},
|
| 216 |
"hf_spaces_search": {
|
| 217 |
"category": "wrapped_hf_repo_search",
|
| 218 |
"row_type": "repo",
|
|
@@ -420,9 +396,6 @@ def build_helper_contracts(
|
|
| 420 |
param_values = _param_values_for_helper(helper_name)
|
| 421 |
if param_values is not None:
|
| 422 |
contract["param_values"] = param_values
|
| 423 |
-
backed_by = spec.get("backed_by")
|
| 424 |
-
if isinstance(backed_by, str):
|
| 425 |
-
contract["backed_by"] = backed_by
|
| 426 |
|
| 427 |
upstream_repo_type = spec.get("upstream_repo_type")
|
| 428 |
if isinstance(upstream_repo_type, str):
|
|
|
|
| 16 |
ACTIVITY_CANONICAL_FIELDS,
|
| 17 |
ACTOR_CANONICAL_FIELDS,
|
| 18 |
COLLECTION_CANONICAL_FIELDS,
|
| 19 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 20 |
DISCUSSION_CANONICAL_FIELDS,
|
| 21 |
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
|
|
|
|
|
|
| 22 |
PROFILE_CANONICAL_FIELDS,
|
| 23 |
REPO_CANONICAL_FIELDS,
|
| 24 |
USER_CANONICAL_FIELDS,
|
|
|
|
| 76 |
"activity": list(ACTIVITY_CANONICAL_FIELDS),
|
| 77 |
"actor": list(ACTOR_CANONICAL_FIELDS),
|
| 78 |
"collection": list(COLLECTION_CANONICAL_FIELDS),
|
| 79 |
+
"daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
|
| 80 |
"discussion": list(DISCUSSION_CANONICAL_FIELDS),
|
| 81 |
"discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
|
|
|
|
|
|
| 82 |
"profile": list(PROFILE_CANONICAL_FIELDS),
|
| 83 |
"repo": list(REPO_CANONICAL_FIELDS),
|
| 84 |
"trending_repo": list(TRENDING_CANONICAL_FIELDS),
|
|
|
|
| 109 |
},
|
| 110 |
"hf_daily_papers": {
|
| 111 |
"category": "curated_feed",
|
| 112 |
+
"row_type": "daily_paper",
|
| 113 |
+
"fields_group": "daily_paper",
|
| 114 |
"filter_param": "where",
|
| 115 |
+
"filter_group": "daily_paper",
|
|
|
|
|
|
|
| 116 |
},
|
| 117 |
"hf_datasets_search": {
|
| 118 |
"category": "wrapped_hf_repo_search",
|
|
|
|
| 142 |
"row_type": "profile",
|
| 143 |
"param_values": {"include": ["likes", "activity"]},
|
| 144 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
"hf_recent_activity": {
|
| 146 |
"category": "activity_feed",
|
| 147 |
"row_type": "activity",
|
|
|
|
| 189 |
"row_type": "runtime_capability",
|
| 190 |
"param_values": {"section": list(RUNTIME_CAPABILITY_SECTION_VALUES)},
|
| 191 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
"hf_spaces_search": {
|
| 193 |
"category": "wrapped_hf_repo_search",
|
| 194 |
"row_type": "repo",
|
|
|
|
| 396 |
param_values = _param_values_for_helper(helper_name)
|
| 397 |
if param_values is not None:
|
| 398 |
contract["param_values"] = param_values
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
upstream_repo_type = spec.get("upstream_repo_type")
|
| 401 |
if isinstance(upstream_repo_type, str):
|
monty_api/helpers/__init__.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from .activity import register_activity_helpers
|
| 2 |
from .collections import register_collection_helpers
|
| 3 |
from .introspection import register_introspection_helpers
|
| 4 |
-
from .papers import register_paper_helpers
|
| 5 |
from .profiles import register_profile_helpers
|
| 6 |
from .repos import register_repo_helpers
|
| 7 |
|
|
@@ -9,7 +8,6 @@ __all__ = [
|
|
| 9 |
"register_activity_helpers",
|
| 10 |
"register_collection_helpers",
|
| 11 |
"register_introspection_helpers",
|
| 12 |
-
"register_paper_helpers",
|
| 13 |
"register_profile_helpers",
|
| 14 |
"register_repo_helpers",
|
| 15 |
]
|
|
|
|
| 1 |
from .activity import register_activity_helpers
|
| 2 |
from .collections import register_collection_helpers
|
| 3 |
from .introspection import register_introspection_helpers
|
|
|
|
| 4 |
from .profiles import register_profile_helpers
|
| 5 |
from .repos import register_repo_helpers
|
| 6 |
|
|
|
|
| 8 |
"register_activity_helpers",
|
| 9 |
"register_collection_helpers",
|
| 10 |
"register_introspection_helpers",
|
|
|
|
| 11 |
"register_profile_helpers",
|
| 12 |
"register_repo_helpers",
|
| 13 |
]
|
monty_api/helpers/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (487 Bytes). View file
|
|
|
monty_api/helpers/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (489 Bytes). View file
|
|
|
monty_api/helpers/__pycache__/activity.cpython-313.pyc
ADDED
|
Binary file (8.71 kB). View file
|
|
|
monty_api/helpers/__pycache__/activity.cpython-314.pyc
ADDED
|
Binary file (9.3 kB). View file
|
|
|
monty_api/helpers/__pycache__/collections.cpython-313.pyc
ADDED
|
Binary file (12.7 kB). View file
|
|
|
monty_api/helpers/__pycache__/collections.cpython-314.pyc
ADDED
|
Binary file (13.8 kB). View file
|
|
|
monty_api/helpers/__pycache__/common.cpython-313.pyc
ADDED
|
Binary file (1.5 kB). View file
|
|
|
monty_api/helpers/__pycache__/common.cpython-314.pyc
ADDED
|
Binary file (1.64 kB). View file
|
|
|
monty_api/helpers/__pycache__/introspection.cpython-313.pyc
ADDED
|
Binary file (11.1 kB). View file
|
|
|
monty_api/helpers/__pycache__/introspection.cpython-314.pyc
ADDED
|
Binary file (12.4 kB). View file
|
|
|
monty_api/helpers/__pycache__/profiles.cpython-313.pyc
ADDED
|
Binary file (32.7 kB). View file
|
|
|
monty_api/helpers/__pycache__/profiles.cpython-314.pyc
ADDED
|
Binary file (35.3 kB). View file
|
|
|
monty_api/helpers/__pycache__/repos.cpython-313.pyc
ADDED
|
Binary file (49.5 kB). View file
|
|
|
monty_api/helpers/__pycache__/repos.cpython-314.pyc
ADDED
|
Binary file (53.5 kB). View file
|
|
|
monty_api/helpers/introspection.py
CHANGED
|
@@ -10,6 +10,7 @@ from ..constants import (
|
|
| 10 |
ACTIVITY_CANONICAL_FIELDS,
|
| 11 |
ACTOR_CANONICAL_FIELDS,
|
| 12 |
COLLECTION_CANONICAL_FIELDS,
|
|
|
|
| 13 |
DISCUSSION_CANONICAL_FIELDS,
|
| 14 |
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 15 |
DEFAULT_MAX_CALLS,
|
|
@@ -18,8 +19,6 @@ from ..constants import (
|
|
| 18 |
LIKES_SCAN_LIMIT_CAP,
|
| 19 |
MAX_CALLS_LIMIT,
|
| 20 |
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 21 |
-
PAPER_CANONICAL_FIELDS,
|
| 22 |
-
PAPER_CONTENT_FIELDS,
|
| 23 |
PROFILE_CANONICAL_FIELDS,
|
| 24 |
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 25 |
REPO_CANONICAL_FIELDS,
|
|
@@ -141,8 +140,7 @@ async def hf_runtime_capabilities(
|
|
| 141 |
"user_likes": list(USER_LIKES_CANONICAL_FIELDS),
|
| 142 |
"activity": list(ACTIVITY_CANONICAL_FIELDS),
|
| 143 |
"collection": list(COLLECTION_CANONICAL_FIELDS),
|
| 144 |
-
"
|
| 145 |
-
"paper_content": list(PAPER_CONTENT_FIELDS),
|
| 146 |
"discussion": list(DISCUSSION_CANONICAL_FIELDS),
|
| 147 |
"discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
| 148 |
},
|
|
|
|
| 10 |
ACTIVITY_CANONICAL_FIELDS,
|
| 11 |
ACTOR_CANONICAL_FIELDS,
|
| 12 |
COLLECTION_CANONICAL_FIELDS,
|
| 13 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 14 |
DISCUSSION_CANONICAL_FIELDS,
|
| 15 |
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 16 |
DEFAULT_MAX_CALLS,
|
|
|
|
| 19 |
LIKES_SCAN_LIMIT_CAP,
|
| 20 |
MAX_CALLS_LIMIT,
|
| 21 |
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
|
|
|
|
|
|
| 22 |
PROFILE_CANONICAL_FIELDS,
|
| 23 |
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 24 |
REPO_CANONICAL_FIELDS,
|
|
|
|
| 140 |
"user_likes": list(USER_LIKES_CANONICAL_FIELDS),
|
| 141 |
"activity": list(ACTIVITY_CANONICAL_FIELDS),
|
| 142 |
"collection": list(COLLECTION_CANONICAL_FIELDS),
|
| 143 |
+
"daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
|
|
|
|
| 144 |
"discussion": list(DISCUSSION_CANONICAL_FIELDS),
|
| 145 |
"discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
| 146 |
},
|
monty_api/helpers/profiles.py
CHANGED
|
@@ -338,8 +338,8 @@ async def hf_org_members(
|
|
| 338 |
)
|
| 339 |
sample_complete = (
|
| 340 |
exact_count
|
| 341 |
-
and
|
| 342 |
-
and (not count_only or
|
| 343 |
)
|
| 344 |
more_available = ctx._derive_more_available(
|
| 345 |
sample_complete=sample_complete,
|
|
@@ -372,18 +372,13 @@ async def hf_org_members(
|
|
| 372 |
"organization": org,
|
| 373 |
},
|
| 374 |
limit_plan=limit_plan,
|
| 375 |
-
matched_count=
|
| 376 |
returned_count=len(items),
|
| 377 |
exact_count=exact_count,
|
| 378 |
count_only=count_only,
|
| 379 |
sample_complete=sample_complete,
|
| 380 |
more_available=more_available,
|
| 381 |
-
scan_limit_hit=scan_limit_hit
|
| 382 |
-
or (
|
| 383 |
-
overview_total is not None
|
| 384 |
-
and overview_total > observed_total
|
| 385 |
-
and observed_total >= scan_lim
|
| 386 |
-
),
|
| 387 |
)
|
| 388 |
return ctx._helper_success(
|
| 389 |
start_calls=start_calls, source=endpoint, items=items, meta=meta
|
|
@@ -578,8 +573,8 @@ async def _user_graph_helper(
|
|
| 578 |
)
|
| 579 |
sample_complete = (
|
| 580 |
exact_count
|
| 581 |
-
and
|
| 582 |
-
and (not count_only or
|
| 583 |
)
|
| 584 |
more_available = ctx._derive_more_available(
|
| 585 |
sample_complete=sample_complete,
|
|
@@ -622,18 +617,13 @@ async def _user_graph_helper(
|
|
| 622 |
"organization": u if entity_type == "organization" else None,
|
| 623 |
},
|
| 624 |
limit_plan=limit_plan,
|
| 625 |
-
matched_count=
|
| 626 |
returned_count=len(items),
|
| 627 |
exact_count=exact_count,
|
| 628 |
count_only=count_only,
|
| 629 |
sample_complete=sample_complete,
|
| 630 |
more_available=more_available,
|
| 631 |
-
scan_limit_hit=scan_limit_hit
|
| 632 |
-
or (
|
| 633 |
-
overview_total is not None
|
| 634 |
-
and overview_total > observed_total
|
| 635 |
-
and observed_total >= scan_lim
|
| 636 |
-
),
|
| 637 |
)
|
| 638 |
return ctx._helper_success(
|
| 639 |
start_calls=start_calls, source=endpoint, items=items, meta=meta
|
|
|
|
| 338 |
)
|
| 339 |
sample_complete = (
|
| 340 |
exact_count
|
| 341 |
+
and len(normalized) <= applied_limit
|
| 342 |
+
and (not count_only or len(normalized) == 0)
|
| 343 |
)
|
| 344 |
more_available = ctx._derive_more_available(
|
| 345 |
sample_complete=sample_complete,
|
|
|
|
| 372 |
"organization": org,
|
| 373 |
},
|
| 374 |
limit_plan=limit_plan,
|
| 375 |
+
matched_count=len(normalized),
|
| 376 |
returned_count=len(items),
|
| 377 |
exact_count=exact_count,
|
| 378 |
count_only=count_only,
|
| 379 |
sample_complete=sample_complete,
|
| 380 |
more_available=more_available,
|
| 381 |
+
scan_limit_hit=scan_limit_hit,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
)
|
| 383 |
return ctx._helper_success(
|
| 384 |
start_calls=start_calls, source=endpoint, items=items, meta=meta
|
|
|
|
| 573 |
)
|
| 574 |
sample_complete = (
|
| 575 |
exact_count
|
| 576 |
+
and len(normalized) <= applied_limit
|
| 577 |
+
and (not count_only or len(normalized) == 0)
|
| 578 |
)
|
| 579 |
more_available = ctx._derive_more_available(
|
| 580 |
sample_complete=sample_complete,
|
|
|
|
| 617 |
"organization": u if entity_type == "organization" else None,
|
| 618 |
},
|
| 619 |
limit_plan=limit_plan,
|
| 620 |
+
matched_count=len(normalized),
|
| 621 |
returned_count=len(items),
|
| 622 |
exact_count=exact_count,
|
| 623 |
count_only=count_only,
|
| 624 |
sample_complete=sample_complete,
|
| 625 |
more_available=more_available,
|
| 626 |
+
scan_limit_hit=scan_limit_hit,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
)
|
| 628 |
return ctx._helper_success(
|
| 629 |
start_calls=start_calls, source=endpoint, items=items, meta=meta
|
monty_api/helpers/repos.py
CHANGED
|
@@ -7,6 +7,7 @@ from ..context_types import HelperRuntimeContext
|
|
| 7 |
from ..helper_contracts import repo_expand_alias_map
|
| 8 |
from ..constants import (
|
| 9 |
ACTOR_CANONICAL_FIELDS,
|
|
|
|
| 10 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 11 |
LIKES_ENRICHMENT_MAX_REPOS,
|
| 12 |
LIKES_RANKING_WINDOW_DEFAULT,
|
|
@@ -122,6 +123,9 @@ def _build_repo_search_extra_args(
|
|
| 122 |
if value:
|
| 123 |
normalized["cardData"] = True
|
| 124 |
continue
|
|
|
|
|
|
|
|
|
|
| 125 |
if key in {"fetch_config", "linked"}:
|
| 126 |
if value:
|
| 127 |
normalized[key] = True
|
|
@@ -179,7 +183,7 @@ async def _run_repo_search(
|
|
| 179 |
extra_args_by_type: dict[str, dict[str, Any]] | None = None,
|
| 180 |
) -> dict[str, Any]:
|
| 181 |
start_calls = ctx.call_count["n"]
|
| 182 |
-
default_limit = ctx._policy_int(helper_name, "default_limit",
|
| 183 |
max_limit = ctx._policy_int(
|
| 184 |
helper_name, "max_limit", SELECTIVE_ENDPOINT_RETURN_HARD_CAP
|
| 185 |
)
|
|
@@ -339,9 +343,10 @@ async def hf_models_search(
|
|
| 339 |
model_name: str | None = None,
|
| 340 |
trained_dataset: str | list[str] | None = None,
|
| 341 |
pipeline_tag: str | None = None,
|
|
|
|
| 342 |
emissions_thresholds: tuple[float, float] | None = None,
|
| 343 |
sort: str | None = None,
|
| 344 |
-
limit: int =
|
| 345 |
expand: list[str] | None = None,
|
| 346 |
full: bool | None = None,
|
| 347 |
card_data: bool = False,
|
|
@@ -369,6 +374,7 @@ async def hf_models_search(
|
|
| 369 |
"model_name": model_name,
|
| 370 |
"trained_dataset": trained_dataset,
|
| 371 |
"pipeline_tag": pipeline_tag,
|
|
|
|
| 372 |
"emissions_thresholds": emissions_thresholds,
|
| 373 |
"expand": expand,
|
| 374 |
"full": full,
|
|
@@ -394,7 +400,7 @@ async def hf_datasets_search(
|
|
| 394 |
task_categories: str | list[str] | None = None,
|
| 395 |
task_ids: str | list[str] | None = None,
|
| 396 |
sort: str | None = None,
|
| 397 |
-
limit: int =
|
| 398 |
expand: list[str] | None = None,
|
| 399 |
full: bool | None = None,
|
| 400 |
fields: list[str] | None = None,
|
|
@@ -438,7 +444,7 @@ async def hf_spaces_search(
|
|
| 438 |
models: str | list[str] | None = None,
|
| 439 |
linked: bool = False,
|
| 440 |
sort: str | None = None,
|
| 441 |
-
limit: int =
|
| 442 |
expand: list[str] | None = None,
|
| 443 |
full: bool | None = None,
|
| 444 |
fields: list[str] | None = None,
|
|
@@ -475,7 +481,7 @@ async def hf_repo_search(
|
|
| 475 |
filter: str | list[str] | None = None,
|
| 476 |
author: str | None = None,
|
| 477 |
sort: str | None = None,
|
| 478 |
-
limit: int =
|
| 479 |
fields: list[str] | None = None,
|
| 480 |
post_filter: dict[str, Any] | None = None,
|
| 481 |
) -> dict[str, Any]:
|
|
@@ -1286,6 +1292,62 @@ async def hf_trending(
|
|
| 1286 |
)
|
| 1287 |
|
| 1288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1289 |
def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
|
| 1290 |
return {
|
| 1291 |
"hf_models_search": partial(hf_models_search, ctx),
|
|
@@ -1298,4 +1360,5 @@ def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[...,
|
|
| 1298 |
"hf_repo_discussion_details": partial(hf_repo_discussion_details, ctx),
|
| 1299 |
"hf_repo_details": partial(hf_repo_details, ctx),
|
| 1300 |
"hf_trending": partial(hf_trending, ctx),
|
|
|
|
| 1301 |
}
|
|
|
|
| 7 |
from ..helper_contracts import repo_expand_alias_map
|
| 8 |
from ..constants import (
|
| 9 |
ACTOR_CANONICAL_FIELDS,
|
| 10 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 11 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 12 |
LIKES_ENRICHMENT_MAX_REPOS,
|
| 13 |
LIKES_RANKING_WINDOW_DEFAULT,
|
|
|
|
| 123 |
if value:
|
| 124 |
normalized["cardData"] = True
|
| 125 |
continue
|
| 126 |
+
if key in {"num_params", "num_parameters"}:
|
| 127 |
+
normalized["num_parameters"] = value
|
| 128 |
+
continue
|
| 129 |
if key in {"fetch_config", "linked"}:
|
| 130 |
if value:
|
| 131 |
normalized[key] = True
|
|
|
|
| 183 |
extra_args_by_type: dict[str, dict[str, Any]] | None = None,
|
| 184 |
) -> dict[str, Any]:
|
| 185 |
start_calls = ctx.call_count["n"]
|
| 186 |
+
default_limit = ctx._policy_int(helper_name, "default_limit", 100)
|
| 187 |
max_limit = ctx._policy_int(
|
| 188 |
helper_name, "max_limit", SELECTIVE_ENDPOINT_RETURN_HARD_CAP
|
| 189 |
)
|
|
|
|
| 343 |
model_name: str | None = None,
|
| 344 |
trained_dataset: str | list[str] | None = None,
|
| 345 |
pipeline_tag: str | None = None,
|
| 346 |
+
num_params: str | None = None,
|
| 347 |
emissions_thresholds: tuple[float, float] | None = None,
|
| 348 |
sort: str | None = None,
|
| 349 |
+
limit: int = 100,
|
| 350 |
expand: list[str] | None = None,
|
| 351 |
full: bool | None = None,
|
| 352 |
card_data: bool = False,
|
|
|
|
| 374 |
"model_name": model_name,
|
| 375 |
"trained_dataset": trained_dataset,
|
| 376 |
"pipeline_tag": pipeline_tag,
|
| 377 |
+
"num_params": num_params,
|
| 378 |
"emissions_thresholds": emissions_thresholds,
|
| 379 |
"expand": expand,
|
| 380 |
"full": full,
|
|
|
|
| 400 |
task_categories: str | list[str] | None = None,
|
| 401 |
task_ids: str | list[str] | None = None,
|
| 402 |
sort: str | None = None,
|
| 403 |
+
limit: int = 100,
|
| 404 |
expand: list[str] | None = None,
|
| 405 |
full: bool | None = None,
|
| 406 |
fields: list[str] | None = None,
|
|
|
|
| 444 |
models: str | list[str] | None = None,
|
| 445 |
linked: bool = False,
|
| 446 |
sort: str | None = None,
|
| 447 |
+
limit: int = 100,
|
| 448 |
expand: list[str] | None = None,
|
| 449 |
full: bool | None = None,
|
| 450 |
fields: list[str] | None = None,
|
|
|
|
| 481 |
filter: str | list[str] | None = None,
|
| 482 |
author: str | None = None,
|
| 483 |
sort: str | None = None,
|
| 484 |
+
limit: int = 100,
|
| 485 |
fields: list[str] | None = None,
|
| 486 |
post_filter: dict[str, Any] | None = None,
|
| 487 |
) -> dict[str, Any]:
|
|
|
|
| 1292 |
)
|
| 1293 |
|
| 1294 |
|
| 1295 |
+
async def hf_daily_papers(
|
| 1296 |
+
ctx: HelperRuntimeContext,
|
| 1297 |
+
limit: int = 20,
|
| 1298 |
+
where: dict[str, Any] | None = None,
|
| 1299 |
+
fields: list[str] | None = None,
|
| 1300 |
+
) -> dict[str, Any]:
|
| 1301 |
+
start_calls = ctx.call_count["n"]
|
| 1302 |
+
default_limit = ctx._policy_int("hf_daily_papers", "default_limit", 20)
|
| 1303 |
+
max_limit = ctx._policy_int(
|
| 1304 |
+
"hf_daily_papers", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 1305 |
+
)
|
| 1306 |
+
lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
|
| 1307 |
+
resp = ctx._host_raw_call("/api/daily_papers", params={"limit": lim})
|
| 1308 |
+
if not resp.get("ok"):
|
| 1309 |
+
return ctx._helper_error(
|
| 1310 |
+
start_calls=start_calls,
|
| 1311 |
+
source="/api/daily_papers",
|
| 1312 |
+
error=resp.get("error") or "daily papers fetch failed",
|
| 1313 |
+
)
|
| 1314 |
+
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
| 1315 |
+
items: list[dict[str, Any]] = []
|
| 1316 |
+
for idx, row in enumerate(payload[:lim], start=1):
|
| 1317 |
+
if not isinstance(row, dict):
|
| 1318 |
+
continue
|
| 1319 |
+
items.append(ctx._normalize_daily_paper_row(row, rank=idx))
|
| 1320 |
+
try:
|
| 1321 |
+
items = ctx._apply_where(
|
| 1322 |
+
items, where, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
|
| 1323 |
+
)
|
| 1324 |
+
except ValueError as exc:
|
| 1325 |
+
return ctx._helper_error(
|
| 1326 |
+
start_calls=start_calls,
|
| 1327 |
+
source="/api/daily_papers",
|
| 1328 |
+
error=exc,
|
| 1329 |
+
)
|
| 1330 |
+
matched = len(items)
|
| 1331 |
+
try:
|
| 1332 |
+
items = ctx._project_daily_paper_items(items[:lim], fields)
|
| 1333 |
+
except ValueError as exc:
|
| 1334 |
+
return ctx._helper_error(
|
| 1335 |
+
start_calls=start_calls,
|
| 1336 |
+
source="/api/daily_papers",
|
| 1337 |
+
error=exc,
|
| 1338 |
+
)
|
| 1339 |
+
return ctx._helper_success(
|
| 1340 |
+
start_calls=start_calls,
|
| 1341 |
+
source="/api/daily_papers",
|
| 1342 |
+
items=items,
|
| 1343 |
+
limit=lim,
|
| 1344 |
+
scanned=len(payload),
|
| 1345 |
+
matched=matched,
|
| 1346 |
+
returned=len(items),
|
| 1347 |
+
ordered_ranking=True,
|
| 1348 |
+
)
|
| 1349 |
+
|
| 1350 |
+
|
| 1351 |
def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
|
| 1352 |
return {
|
| 1353 |
"hf_models_search": partial(hf_models_search, ctx),
|
|
|
|
| 1360 |
"hf_repo_discussion_details": partial(hf_repo_discussion_details, ctx),
|
| 1361 |
"hf_repo_details": partial(hf_repo_details, ctx),
|
| 1362 |
"hf_trending": partial(hf_trending, ctx),
|
| 1363 |
+
"hf_daily_papers": partial(hf_daily_papers, ctx),
|
| 1364 |
}
|