Spaces:
Running
Running
Publish collection owner lookup fix
Browse files- Dockerfile +1 -1
- _monty_codegen_shared.md +167 -436
- hf-hub-query.md +1 -1
- monty_api/__pycache__/aliases.cpython-313.pyc +0 -0
- monty_api/__pycache__/constants.cpython-313.pyc +0 -0
- monty_api/__pycache__/http_runtime.cpython-313.pyc +0 -0
- monty_api/__pycache__/registry.cpython-313.pyc +0 -0
- monty_api/__pycache__/runtime_context.cpython-313.pyc +0 -0
- monty_api/__pycache__/runtime_envelopes.cpython-313.pyc +0 -0
- monty_api/__pycache__/runtime_filtering.cpython-313.pyc +0 -0
- monty_api/__pycache__/validation.cpython-313.pyc +0 -0
- monty_api/aliases.py +0 -90
- monty_api/constants.py +40 -3
- monty_api/helpers/__pycache__/activity.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/collections.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/introspection.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/profiles.cpython-313.pyc +0 -0
- monty_api/helpers/__pycache__/repos.cpython-313.pyc +0 -0
- monty_api/helpers/activity.py +28 -12
- monty_api/helpers/collections.py +112 -29
- monty_api/helpers/introspection.py +131 -59
- monty_api/helpers/profiles.py +92 -51
- monty_api/helpers/repos.py +513 -252
- monty_api/http_runtime.py +9 -5
- monty_api/registry.py +74 -69
- monty_api/runtime_context.py +4 -0
- monty_api/runtime_envelopes.py +35 -36
- monty_api/runtime_filtering.py +65 -32
- monty_api/validation.py +2 -2
Dockerfile
CHANGED
|
@@ -15,7 +15,7 @@ COPY wheels /tmp/wheels
|
|
| 15 |
RUN uv pip install --system --no-cache \
|
| 16 |
"fast-agent-mcp>=0.6.1" \
|
| 17 |
huggingface_hub \
|
| 18 |
-
|
| 19 |
|
| 20 |
COPY --link ./ /app
|
| 21 |
RUN chown -R 1000:1000 /app
|
|
|
|
| 15 |
RUN uv pip install --system --no-cache \
|
| 16 |
"fast-agent-mcp>=0.6.1" \
|
| 17 |
huggingface_hub \
|
| 18 |
+
"pydantic-monty==0.0.8"
|
| 19 |
|
| 20 |
COPY --link ./ /app
|
| 21 |
RUN chown -R 1000:1000 /app
|
_monty_codegen_shared.md
CHANGED
|
@@ -1,49 +1,146 @@
|
|
| 1 |
-
##
|
| 2 |
|
| 3 |
-
- You
|
| 4 |
-
-
|
| 5 |
-
- All helper
|
| 6 |
-
-
|
| 7 |
-
- The outer wrapper is an exact contract. You **MUST** use this exact skeleton and only change the body:
|
| 8 |
|
| 9 |
```py
|
| 10 |
async def solve(query, max_calls):
|
| 11 |
...
|
| 12 |
-
# body goes here
|
| 13 |
|
| 14 |
await solve(query, max_calls)
|
| 15 |
```
|
| 16 |
|
| 17 |
-
-
|
| 18 |
-
-
|
| 19 |
-
-
|
| 20 |
-
-
|
| 21 |
-
-
|
| 22 |
-
-
|
| 23 |
-
-
|
| 24 |
-
- If
|
| 25 |
-
- For current-user prompts (`my`, `me`),
|
| 26 |
-
-
|
| 27 |
-
|
| 28 |
-
##
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
-
|
| 33 |
-
- `hf_trending(...)` uses `limit`, **not** `return_limit`.
|
| 34 |
-
- `hf_daily_papers(...)` uses `limit`, **not** `return_limit`.
|
| 35 |
-
- `hf_repo_discussions(...)` uses `limit`, **not** `return_limit`, and does **not** accept `fields`.
|
| 36 |
-
- `hf_user_graph(...)`, `hf_user_likes(...)`, `hf_org_members(...)`, `hf_recent_activity(...)`, and `hf_collection_items(...)` use `return_limit`.
|
| 37 |
- `hf_profile_summary(include=...)` supports only `"likes"` and `"activity"`.
|
| 38 |
-
-
|
| 39 |
-
-
|
| 40 |
-
-
|
| 41 |
-
-
|
| 42 |
-
-
|
| 43 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
## Helper result shape
|
|
|
|
| 46 |
All helpers return:
|
|
|
|
| 47 |
```py
|
| 48 |
{
|
| 49 |
"ok": bool,
|
|
@@ -56,427 +153,61 @@ All helpers return:
|
|
| 56 |
|
| 57 |
Rules:
|
| 58 |
- `items` is the canonical list field.
|
| 59 |
-
- `item` is
|
| 60 |
-
- `meta` contains helper-owned execution,
|
| 61 |
-
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
## Helper API
|
| 65 |
```py
|
| 66 |
-
await
|
| 67 |
|
| 68 |
-
await
|
| 69 |
-
handle: str | None = None,
|
| 70 |
-
include: list[str] | None = None,
|
| 71 |
-
likes_limit: int = 10,
|
| 72 |
-
activity_limit: int = 10,
|
| 73 |
-
)
|
| 74 |
-
# include supports only: ["likes"], ["activity"], or ["likes", "activity"]
|
| 75 |
-
# aggregate counts like followers_count / following_count / models_count are already in item
|
| 76 |
-
|
| 77 |
-
await hf_org_members(
|
| 78 |
-
organization: str,
|
| 79 |
-
return_limit: int | None = None,
|
| 80 |
-
scan_limit: int | None = None,
|
| 81 |
-
count_only: bool = False,
|
| 82 |
-
where: dict | None = None,
|
| 83 |
-
fields: list[str] | None = None,
|
| 84 |
-
)
|
| 85 |
|
| 86 |
-
await
|
| 87 |
-
query: str | None = None,
|
| 88 |
-
repo_type: str | None = None,
|
| 89 |
-
repo_types: list[str] | None = None,
|
| 90 |
-
author: str | None = None,
|
| 91 |
-
filters: list[str] | None = None,
|
| 92 |
-
sort: str | None = None,
|
| 93 |
-
limit: int = 20,
|
| 94 |
-
where: dict | None = None,
|
| 95 |
-
fields: list[str] | None = None,
|
| 96 |
-
advanced: dict | None = None,
|
| 97 |
-
)
|
| 98 |
-
# hf_repo_search contract:
|
| 99 |
-
# - filters: upstream HF search/tag filters only (not arbitrary returned row fields)
|
| 100 |
-
# - where: local predicate over returned normalized row fields
|
| 101 |
-
# - fields: select which normalized row fields are returned
|
| 102 |
-
# - for Space runtime status, use where={"runtime_stage": ...}, not filters=["state:..."]
|
| 103 |
-
|
| 104 |
-
await hf_repo_details(
|
| 105 |
-
repo_id: str | None = None,
|
| 106 |
-
repo_ids: list[str] | None = None,
|
| 107 |
-
repo_type: str = "auto",
|
| 108 |
-
fields: list[str] | None = None,
|
| 109 |
-
)
|
| 110 |
|
| 111 |
-
await
|
| 112 |
-
repo_type: str = "model",
|
| 113 |
-
limit: int = 20,
|
| 114 |
-
where: dict | None = None,
|
| 115 |
-
fields: list[str] | None = None,
|
| 116 |
-
)
|
| 117 |
|
| 118 |
-
await
|
| 119 |
-
limit: int = 20,
|
| 120 |
-
where: dict | None = None,
|
| 121 |
-
fields: list[str] | None = None,
|
| 122 |
-
)
|
| 123 |
|
| 124 |
-
await
|
| 125 |
-
username: str | None = None,
|
| 126 |
-
relation: str = "followers",
|
| 127 |
-
return_limit: int | None = None,
|
| 128 |
-
scan_limit: int | None = None,
|
| 129 |
-
count_only: bool = False,
|
| 130 |
-
pro_only: bool | None = None,
|
| 131 |
-
where: dict | None = None,
|
| 132 |
-
fields: list[str] | None = None,
|
| 133 |
-
)
|
| 134 |
|
| 135 |
-
await
|
| 136 |
-
repo_id: str,
|
| 137 |
-
repo_type: str,
|
| 138 |
-
return_limit: int | None = None,
|
| 139 |
-
count_only: bool = False,
|
| 140 |
-
pro_only: bool | None = None,
|
| 141 |
-
where: dict | None = None,
|
| 142 |
-
fields: list[str] | None = None,
|
| 143 |
-
)
|
| 144 |
|
| 145 |
-
await
|
| 146 |
-
username: str | None = None,
|
| 147 |
-
repo_types: list[str] | None = None,
|
| 148 |
-
return_limit: int | None = None,
|
| 149 |
-
scan_limit: int | None = None,
|
| 150 |
-
count_only: bool = False,
|
| 151 |
-
where: dict | None = None,
|
| 152 |
-
fields: list[str] | None = None,
|
| 153 |
-
sort: str | None = None,
|
| 154 |
-
ranking_window: int | None = None,
|
| 155 |
-
)
|
| 156 |
|
| 157 |
-
await
|
| 158 |
-
feed_type: str | None = None,
|
| 159 |
-
entity: str | None = None,
|
| 160 |
-
activity_types: list[str] | None = None,
|
| 161 |
-
repo_types: list[str] | None = None,
|
| 162 |
-
return_limit: int | None = None,
|
| 163 |
-
max_pages: int | None = None,
|
| 164 |
-
start_cursor: str | None = None,
|
| 165 |
-
count_only: bool = False,
|
| 166 |
-
where: dict | None = None,
|
| 167 |
-
fields: list[str] | None = None,
|
| 168 |
-
)
|
| 169 |
|
| 170 |
-
await
|
| 171 |
-
await hf_repo_discussion_details(repo_type: str, repo_id: str, discussion_num: int)
|
| 172 |
|
| 173 |
-
await
|
| 174 |
-
query: str | None = None,
|
| 175 |
-
owner: str | None = None,
|
| 176 |
-
return_limit: int = 20,
|
| 177 |
-
count_only: bool = False,
|
| 178 |
-
where: dict | None = None,
|
| 179 |
-
fields: list[str] | None = None,
|
| 180 |
-
)
|
| 181 |
|
| 182 |
-
await
|
| 183 |
-
collection_id: str,
|
| 184 |
-
repo_types: list[str] | None = None,
|
| 185 |
-
return_limit: int = 100,
|
| 186 |
-
count_only: bool = False,
|
| 187 |
-
where: dict | None = None,
|
| 188 |
-
fields: list[str] | None = None,
|
| 189 |
-
)
|
| 190 |
|
| 191 |
-
await
|
| 192 |
-
```
|
| 193 |
|
| 194 |
-
|
| 195 |
-
- Use canonical snake_case sort keys in generated code. Do **not** use camelCase sort names.
|
| 196 |
-
- `hf_repo_search(sort=...)`:
|
| 197 |
-
- model / dataset: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 198 |
-
- space: `created_at`, `last_modified`, `likes`, `trending_score`
|
| 199 |
-
- `hf_user_likes(sort=...)`: `liked_at`, `repo_likes`, `repo_downloads`
|
| 200 |
-
- `hf_user_likes(...)` row keys: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 201 |
-
- `hf_repo_search(advanced=...)` is allowed only when you pass exactly one `repo_type`.
|
| 202 |
-
- `hf_repo_search(advanced=...)` allowed keys:
|
| 203 |
-
- model: `filter`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `emissions_thresholds`, `expand`, `full`, `cardData`, `fetch_config`
|
| 204 |
-
- dataset: `filter`, `benchmark`, `dataset_name`, `gated`, `language_creators`, `language`, `multilinguality`, `size_categories`, `task_categories`, `task_ids`, `expand`, `full`
|
| 205 |
-
- space: `filter`, `datasets`, `models`, `linked`, `expand`, `full`
|
| 206 |
-
- `advanced["expand"]` values are exact strings. Do **not** convert them to snake_case. Use only these values:
|
| 207 |
-
- model: `author`, `baseModels`, `cardData`, `config`, `createdAt`, `disabled`, `downloads`, `downloadsAllTime`, `evalResults`, `gated`, `gguf`, `inference`, `inferenceProviderMapping`, `lastModified`, `library_name`, `likes`, `mask_token`, `model-index`, `pipeline_tag`, `private`, `resourceGroup`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformersInfo`, `trendingScore`, `widgetData`, `xetEnabled`, `gitalyUid`
|
| 208 |
-
- dataset: `author`, `cardData`, `citation`, `createdAt`, `description`, `disabled`, `downloads`, `downloadsAllTime`, `gated`, `lastModified`, `likes`, `paperswithcode_id`, `private`, `resourceGroup`, `sha`, `siblings`, `tags`, `trendingScore`, `xetEnabled`, `gitalyUid`
|
| 209 |
-
- space: `author`, `cardData`, `createdAt`, `datasets`, `disabled`, `lastModified`, `likes`, `models`, `private`, `resourceGroup`, `runtime`, `sdk`, `sha`, `siblings`, `subdomain`, `tags`, `trendingScore`, `xetEnabled`, `gitalyUid`
|
| 210 |
-
- If a specific expanded field matters to the answer, request it explicitly in `advanced["expand"]`. Do not rely on implicit defaults.
|
| 211 |
-
- `filters` and `where` are **not** interchangeable:
|
| 212 |
-
- `filters` passes upstream HF filter/tag arguments into the Hub client
|
| 213 |
-
- `where` filters the normalized rows returned by this runtime
|
| 214 |
-
- for repo-search questions about upstream fields like `author` or model `pipeline_tag`, push the constraint upstream instead of using `where`
|
| 215 |
-
- if the user asks for a returned field such as Space runtime state/status, prefer `where` over `filters`
|
| 216 |
-
- For model pipeline-tag questions, prefer:
|
| 217 |
-
- `hf_repo_search(repo_type="model", advanced={"pipeline_tag": "text-to-image"}, ...)`
|
| 218 |
-
- **not** `where={"pipeline_tag": "text-to-image"}`
|
| 219 |
-
- For Space runtime state/status questions:
|
| 220 |
-
- the canonical returned field is `runtime_stage`
|
| 221 |
-
- friendly wording like "state" or "status" refers to `runtime_stage`
|
| 222 |
-
- values such as `BUILD_ERROR`, `RUNTIME_ERROR`, `RUNNING`, and `SLEEPING` are runtime stages
|
| 223 |
-
- plain `"ERROR"` is not a canonical stage value; if the user says "error state", treat that as `BUILD_ERROR` and `RUNTIME_ERROR`
|
| 224 |
-
|
| 225 |
-
## Routing guide
|
| 226 |
-
|
| 227 |
-
### Summary vs detail
|
| 228 |
-
- Summary helpers are the default for list/search/trending questions: `hf_repo_search(...)`, `hf_trending(...)`, `hf_daily_papers(...)`, `hf_user_likes(...)`, `hf_recent_activity(...)`, `hf_collections_search(...)`, `hf_collection_items(...)`, `hf_org_members(...)`, `hf_user_graph(...)`.
|
| 229 |
-
- Use `hf_repo_details(...)` when the user needs exact repo metadata rather than a cheap summary row.
|
| 230 |
-
- Do **not** invent follow-up detail calls unless the user explicitly needs fields that are not already available in the current helper response.
|
| 231 |
-
|
| 232 |
-
### Repo questions
|
| 233 |
-
- Exact `owner/name` details → `hf_repo_details(repo_type="auto", ...)`
|
| 234 |
-
- Search/discovery/list/top repos → `hf_repo_search(...)`
|
| 235 |
-
- True trending requests → `hf_trending(...)`
|
| 236 |
-
- Daily papers → `hf_daily_papers(...)`
|
| 237 |
-
- Repo discussions → `hf_repo_discussions(...)`
|
| 238 |
-
- Specific discussion details / latest comment text → `hf_repo_discussion_details(...)`
|
| 239 |
-
- Users who liked a specific repo → `hf_repo_likers(...)`
|
| 240 |
-
|
| 241 |
-
### User questions
|
| 242 |
-
- Profile / overview / "tell me about user X" → `hf_profile_summary(...)`
|
| 243 |
-
- Follower/following **counts** for a user → prefer `hf_profile_summary(...)`
|
| 244 |
-
- Followers / following **lists**, graph samples, and social joins → `hf_user_graph(...)`
|
| 245 |
-
- Repos a user liked → `hf_user_likes(...)`
|
| 246 |
-
- Recent actions / activity feed → `hf_recent_activity(feed_type="user", entity=...)`
|
| 247 |
-
|
| 248 |
-
### Organization questions
|
| 249 |
-
- Organization details and counts → `hf_profile_summary(...)`
|
| 250 |
-
- Organization members → `hf_org_members(...)`
|
| 251 |
-
- Organization repos → `hf_repo_search(author="<org>", repo_types=[...])`
|
| 252 |
-
- Organization or user collections → `hf_collections_search(owner="<org-or-user>", ...)`
|
| 253 |
-
- Repos inside a known collection → `hf_collection_items(collection_id=...)`
|
| 254 |
-
|
| 255 |
-
### Direction reminders
|
| 256 |
-
- `hf_user_likes(...)` = **user → repos**
|
| 257 |
-
- `hf_repo_likers(...)` = **repo → users**
|
| 258 |
-
- `hf_user_graph(...)` = **user/org → followers/following**
|
| 259 |
-
- `"who follows X"` → `hf_user_graph(username="X", relation="followers", ...)`
|
| 260 |
-
- `"who does X follow"` → `hf_user_graph(username="X", relation="following", ...)`
|
| 261 |
-
- If the author/org is already known, start with `hf_repo_search(author=...)` instead of semantic search.
|
| 262 |
-
- For "most popular repo a user liked", use `hf_user_likes(sort="repo_likes" | "repo_downloads", ranking_window=40)` instead of fetching recent likes and re-ranking locally.
|
| 263 |
-
|
| 264 |
-
### Join / intersection guidance
|
| 265 |
-
- For set-intersection questions, prefer **one helper call per side + local set logic**.
|
| 266 |
-
- Example: `"who in the huggingface org follows evalstate"` should use:
|
| 267 |
-
1. `hf_org_members(organization="huggingface", ...)`
|
| 268 |
-
2. `hf_user_graph(username="evalstate", relation="followers", ...)`
|
| 269 |
-
3. intersect `username` locally
|
| 270 |
-
- Example: `"who in the huggingface org does evalstate follow"` should use:
|
| 271 |
-
1. `hf_org_members(organization="huggingface", ...)`
|
| 272 |
-
2. `hf_user_graph(username="evalstate", relation="following", ...)`
|
| 273 |
-
3. intersect `username` locally
|
| 274 |
-
- Do **not** invert follower/following direction when restating the prompt.
|
| 275 |
-
- Do **not** do one graph call per org member for these intersection questions unless you explicitly need a bounded fallback.
|
| 276 |
-
|
| 277 |
-
## Canonical row keys
|
| 278 |
-
Use canonical names in generated code and `fields=[...]`.
|
| 279 |
-
|
| 280 |
-
- Repo rows: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`, `trending_score`, `repo_url`, `tags`
|
| 281 |
-
- Daily paper rows: `paper_id`, `title`, `published_at`, `authors`, `organization`, `repo_id`, `rank`
|
| 282 |
-
- User likes rows: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 283 |
-
- User graph/member rows: `username`, `fullname`, `isPro`, `role`, `type`
|
| 284 |
-
- Activity rows: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 285 |
-
- Collection rows: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `last_updated`, `item_count`
|
| 286 |
-
- `hf_profile_summary(...)["item"]`: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `is_pro`, `likes_sample`, `activity_sample`
|
| 287 |
-
|
| 288 |
-
## High-signal usage notes
|
| 289 |
-
- `hf_repo_search(...)` defaults to models. If the user asks for all repos by an author/org, search across `repo_types=["model", "dataset", "space"]`.
|
| 290 |
-
- Summary helpers come first. Use `hf_repo_details(...)` only when the user explicitly needs exact repo metadata.
|
| 291 |
-
- Use `repo_id` as the display label for repos.
|
| 292 |
-
- `hf_repo_search(...)` model rows may already include `num_params`; use that before considering detail hydration.
|
| 293 |
-
- `hf_trending(...)` returns ordered rows with `trending_rank`. Never fabricate `trending_score`.
|
| 294 |
-
- `hf_daily_papers(...)` may omit `repo_id`. Omit unavailable optional fields instead of forcing nulls.
|
| 295 |
-
- Use `hf_profile_summary(...)["item"]` for aggregate counts such as followers, following, models, datasets, and spaces.
|
| 296 |
-
- Use `hf_whoami()` when you need the explicit current username for joins, comparisons, or labeling.
|
| 297 |
-
- For joins, overlap, and ranking, fetch a large enough working set first and compute locally. It is fine for the internal working set to be larger than the final returned output.
|
| 298 |
-
- Avoid per-row hydration unless exact metadata is required and missing from the current helper response.
|
| 299 |
-
- For fan-out tasks, prefer bounded seed sets by default. If the user explicitly asks for exhaustive coverage, do **not** silently cap at a small sample.
|
| 300 |
-
- If exhaustive coverage is not feasible within the call/time budget, return an explicit partial result with `results` and `coverage`. Never present a bounded sample as complete.
|
| 301 |
-
- In raw mode, do **not** create your own top-level `meta`; runtime already owns the outer `meta`.
|
| 302 |
-
- Use `hf_collections_search(...)` to find collections and `hf_collection_items(...)` to list the repos inside a collection.
|
| 303 |
-
|
| 304 |
-
## Minimal patterns
|
| 305 |
-
```py
|
| 306 |
-
# Exact repo details
|
| 307 |
-
info = await hf_repo_details(
|
| 308 |
-
repo_id="black-forest-labs/FLUX.1-dev",
|
| 309 |
-
repo_type="auto",
|
| 310 |
-
fields=["repo_id", "repo_type", "author", "pipeline_tag", "library_name", "num_params", "likes", "downloads", "repo_url"],
|
| 311 |
-
)
|
| 312 |
-
item = info["item"] or (info["items"][0] if info["items"] else None)
|
| 313 |
-
return {
|
| 314 |
-
"repo_id": item["repo_id"],
|
| 315 |
-
"repo_type": item["repo_type"],
|
| 316 |
-
"author": item["author"],
|
| 317 |
-
"pipeline_tag": item.get("pipeline_tag"),
|
| 318 |
-
"library_name": item.get("library_name"),
|
| 319 |
-
"num_params": item.get("num_params"),
|
| 320 |
-
"likes": item.get("likes"),
|
| 321 |
-
"downloads": item.get("downloads"),
|
| 322 |
-
"repo_url": item.get("repo_url"),
|
| 323 |
-
}
|
| 324 |
|
| 325 |
-
|
| 326 |
-
caps = await hf_runtime_capabilities(section="fields")
|
| 327 |
-
if not caps["ok"]:
|
| 328 |
-
return caps
|
| 329 |
-
item = caps["item"] or (caps["items"][0] if caps["items"] else None)
|
| 330 |
-
return item["content"]
|
| 331 |
-
|
| 332 |
-
# Top trending models with selected fields
|
| 333 |
-
resp = await hf_trending(
|
| 334 |
-
repo_type="model",
|
| 335 |
-
limit=5,
|
| 336 |
-
fields=["repo_id", "likes", "downloads"],
|
| 337 |
-
)
|
| 338 |
-
if not resp["ok"]:
|
| 339 |
-
return resp
|
| 340 |
-
result = []
|
| 341 |
-
for item in resp["items"]:
|
| 342 |
-
row = {}
|
| 343 |
-
for key in ["repo_id", "likes", "downloads"]:
|
| 344 |
-
if item.get(key) is not None:
|
| 345 |
-
row[key] = item[key]
|
| 346 |
-
if row:
|
| 347 |
-
result.append(row)
|
| 348 |
-
return result
|
| 349 |
-
|
| 350 |
-
# Compact profile summary
|
| 351 |
-
summary = await hf_profile_summary(
|
| 352 |
-
handle="mishig",
|
| 353 |
-
include=["likes", "activity"],
|
| 354 |
-
likes_limit=10,
|
| 355 |
-
activity_limit=10,
|
| 356 |
-
)
|
| 357 |
-
item = summary["item"] or (summary["items"][0] if summary["items"] else None)
|
| 358 |
-
return {
|
| 359 |
-
"followers_count": item["followers_count"],
|
| 360 |
-
"following_count": item.get("following_count"),
|
| 361 |
-
"activity_sample": item.get("activity_sample", []),
|
| 362 |
-
"likes_sample": item.get("likes_sample", []),
|
| 363 |
-
}
|
| 364 |
|
| 365 |
-
|
| 366 |
-
followers = await hf_user_graph(
|
| 367 |
-
relation="followers",
|
| 368 |
-
return_limit=20,
|
| 369 |
-
fields=["username"],
|
| 370 |
-
)
|
| 371 |
-
if not followers["ok"]:
|
| 372 |
-
return followers
|
| 373 |
-
result = {}
|
| 374 |
-
processed = 0
|
| 375 |
-
for row in followers["items"]:
|
| 376 |
-
uname = row.get("username")
|
| 377 |
-
if not uname:
|
| 378 |
-
continue
|
| 379 |
-
likes = await hf_user_likes(
|
| 380 |
-
username=uname,
|
| 381 |
-
repo_types=["model"],
|
| 382 |
-
return_limit=3,
|
| 383 |
-
fields=["repo_id", "repo_author", "liked_at"],
|
| 384 |
-
)
|
| 385 |
-
processed += 1
|
| 386 |
-
rows = []
|
| 387 |
-
for item in likes["items"]:
|
| 388 |
-
liked = {}
|
| 389 |
-
for key in ["repo_id", "repo_author", "liked_at"]:
|
| 390 |
-
if item.get(key) is not None:
|
| 391 |
-
liked[key] = item[key]
|
| 392 |
-
if liked:
|
| 393 |
-
rows.append(liked)
|
| 394 |
-
if rows:
|
| 395 |
-
result[uname] = rows
|
| 396 |
-
return {
|
| 397 |
-
"results": result,
|
| 398 |
-
"coverage": {
|
| 399 |
-
"partial": bool(followers["meta"].get("more_available")),
|
| 400 |
-
"reason": "fanout_budget",
|
| 401 |
-
"seed_relation": "followers",
|
| 402 |
-
"seed_limit": 20,
|
| 403 |
-
"seed_processed": processed,
|
| 404 |
-
"seed_total": followers["meta"].get("total"),
|
| 405 |
-
"seed_more_available": followers["meta"].get("more_available"),
|
| 406 |
-
"per_entity_limit": 3,
|
| 407 |
-
"next_request_hint": "Ask for a smaller subset or a follow-up batch if you want more coverage.",
|
| 408 |
-
},
|
| 409 |
-
}
|
| 410 |
|
| 411 |
-
|
| 412 |
-
likes = await hf_user_likes(
|
| 413 |
-
username="julien-c",
|
| 414 |
-
return_limit=1,
|
| 415 |
-
sort="repo_likes",
|
| 416 |
-
ranking_window=40,
|
| 417 |
-
fields=["repo_id", "repo_type", "repo_author", "repo_likes", "repo_url", "liked_at"],
|
| 418 |
-
)
|
| 419 |
-
item = likes["item"] or (likes["items"][0] if likes["items"] else None)
|
| 420 |
-
if item is None:
|
| 421 |
-
return {"error": "No liked repositories found"}
|
| 422 |
-
repo = {}
|
| 423 |
-
for key in ["repo_id", "repo_type", "repo_author", "repo_likes", "repo_url", "liked_at"]:
|
| 424 |
-
if item.get(key) is not None:
|
| 425 |
-
repo[key] = item[key]
|
| 426 |
-
return {
|
| 427 |
-
"repo": repo,
|
| 428 |
-
"metadata": {
|
| 429 |
-
"sort_applied": likes["meta"].get("sort_applied"),
|
| 430 |
-
"ranking_window": likes["meta"].get("ranking_window"),
|
| 431 |
-
"ranking_complete": likes["meta"].get("ranking_complete"),
|
| 432 |
-
},
|
| 433 |
-
}
|
| 434 |
|
| 435 |
-
|
| 436 |
-
activity = await hf_recent_activity(
|
| 437 |
-
feed_type="user",
|
| 438 |
-
entity="mishig",
|
| 439 |
-
return_limit=15,
|
| 440 |
-
fields=["event_type", "repo_id", "repo_type", "timestamp"],
|
| 441 |
-
)
|
| 442 |
-
result = []
|
| 443 |
-
for row in activity["items"]:
|
| 444 |
-
item = {}
|
| 445 |
-
for key in ["event_type", "repo_id", "repo_type", "timestamp"]:
|
| 446 |
-
if row.get(key) is not None:
|
| 447 |
-
item[key] = row[key]
|
| 448 |
-
if item:
|
| 449 |
-
result.append(item)
|
| 450 |
-
return result
|
| 451 |
-
|
| 452 |
-
# Repo discussions
|
| 453 |
-
rows = await hf_repo_discussions(
|
| 454 |
-
repo_type="model",
|
| 455 |
-
repo_id="Qwen/Qwen3.5-35B-A3B",
|
| 456 |
-
limit=10,
|
| 457 |
-
)
|
| 458 |
-
return [
|
| 459 |
-
{
|
| 460 |
-
"num": row["num"],
|
| 461 |
-
"title": row["title"],
|
| 462 |
-
"author": row["author"],
|
| 463 |
-
"status": row["status"],
|
| 464 |
-
}
|
| 465 |
-
for row in rows["items"]
|
| 466 |
-
]
|
| 467 |
-
|
| 468 |
-
# Collections owned by an org or user
|
| 469 |
-
collections = await hf_collections_search(
|
| 470 |
-
owner="Qwen",
|
| 471 |
-
return_limit=20,
|
| 472 |
-
fields=["collection_id", "title", "owner", "description", "last_updated", "item_count"],
|
| 473 |
-
)
|
| 474 |
-
return collections["items"]
|
| 475 |
|
| 476 |
-
|
| 477 |
-
papers = await hf_daily_papers(
|
| 478 |
-
limit=20,
|
| 479 |
-
fields=["title", "repo_id"],
|
| 480 |
-
)
|
| 481 |
-
return papers["items"]
|
| 482 |
```
|
|
|
|
| 1 |
+
## Monty rules
|
| 2 |
|
| 3 |
+
- You are writing Python for Monty.
|
| 4 |
+
- Do **not** use imports.
|
| 5 |
+
- All helper calls are async: always use `await`.
|
| 6 |
+
- Use this exact outer shape:
|
|
|
|
| 7 |
|
| 8 |
```py
|
| 9 |
async def solve(query, max_calls):
|
| 10 |
...
|
|
|
|
| 11 |
|
| 12 |
await solve(query, max_calls)
|
| 13 |
```
|
| 14 |
|
| 15 |
+
- `max_calls` is the total external-call budget for the whole program.
|
| 16 |
+
- Use only documented `hf_*` helpers.
|
| 17 |
+
- If you are unsure about helper names, fields, defaults, or limits, call `hf_runtime_capabilities(...)`.
|
| 18 |
+
- Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
|
| 19 |
+
- Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
|
| 20 |
+
- Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
|
| 21 |
+
- If the user says "return only" some fields, return exactly that final shape.
|
| 22 |
+
- If a helper already returns the requested row shape, return `resp["items"]` directly instead of rebuilding it.
|
| 23 |
+
- For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
|
| 24 |
+
- If a current-user helper returns `ok=false`, return that helper response directly.
|
| 25 |
+
|
| 26 |
+
## Search rules
|
| 27 |
+
|
| 28 |
+
## Parameter notes
|
| 29 |
+
|
| 30 |
+
- List helpers use `limit`.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
- `hf_profile_summary(include=...)` supports only `"likes"` and `"activity"`.
|
| 32 |
+
- `hf_user_likes(sort=...)` supports `liked_at`, `repo_likes`, and `repo_downloads`.
|
| 33 |
+
- When the user asks for helper-owned coverage metadata, use `helper_resp["meta"]`.
|
| 34 |
+
- For pro-only follower/member/liker queries, prefer `pro_only=True` instead of filtering on a projected field.
|
| 35 |
+
- `hf_profile_summary(...).item` aggregate counts use exact names like `followers_count` and `following_count`.
|
| 36 |
+
- `hf_user_likes(...)` rows use `repo_likes` / `repo_downloads`, not plain `likes` / `downloads`.
|
| 37 |
+
- `hf_user_graph(...)` and `hf_repo_likers(...)` rows use `is_pro`.
|
| 38 |
+
- `hf_repo_discussions(...)` rows use `num`, `title`, `author`, `status`, `created_at`, and `url`.
|
| 39 |
+
- `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
|
| 40 |
+
- Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
|
| 41 |
+
|
| 42 |
+
- If the user is asking about models, use `hf_models_search(...)`.
|
| 43 |
+
- If the user is asking about datasets, use `hf_datasets_search(...)`.
|
| 44 |
+
- If the user is asking about spaces, use `hf_spaces_search(...)`.
|
| 45 |
+
- Use `hf_repo_search(...)` only for intentionally cross-type search.
|
| 46 |
+
- Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
|
| 47 |
+
- Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
|
| 48 |
+
- Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
|
| 49 |
+
- Push constraints upstream whenever a first-class helper argument exists.
|
| 50 |
+
- `post_filter` is only for filtering normalized rows after fetch.
|
| 51 |
+
- Keep `post_filter` simple:
|
| 52 |
+
- exact match or `in` for returned fields like `runtime_stage`
|
| 53 |
+
- `gte` / `lte` only for `downloads` and `likes`
|
| 54 |
+
- Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
|
| 55 |
+
|
| 56 |
+
Examples:
|
| 57 |
+
|
| 58 |
+
```py
|
| 59 |
+
await hf_models_search(pipeline_tag="text-to-image", limit=10)
|
| 60 |
+
await hf_datasets_search(search="speech", sort="downloads", limit=10)
|
| 61 |
+
await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
|
| 62 |
+
await hf_models_search(search="gguf", post_filter={"downloads": {"gte": 1000}})
|
| 63 |
+
await hf_collections_search(owner="Qwen", limit=10)
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
Field-only pattern:
|
| 67 |
+
|
| 68 |
+
```py
|
| 69 |
+
resp = await hf_models_search(
|
| 70 |
+
pipeline_tag="text-to-image",
|
| 71 |
+
fields=["repo_id", "author", "likes", "downloads", "repo_url"],
|
| 72 |
+
limit=3,
|
| 73 |
+
)
|
| 74 |
+
return resp["items"]
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
Coverage pattern:
|
| 78 |
+
|
| 79 |
+
```py
|
| 80 |
+
resp = await hf_user_likes(
|
| 81 |
+
username="julien-c",
|
| 82 |
+
sort="repo_likes",
|
| 83 |
+
limit=20,
|
| 84 |
+
fields=["repo_id", "repo_likes", "repo_url"],
|
| 85 |
+
)
|
| 86 |
+
return {"results": resp["items"][:1], "coverage": resp["meta"]}
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
Profile-count pattern:
|
| 90 |
+
|
| 91 |
+
```py
|
| 92 |
+
profile = await hf_profile_summary(handle="mishig")
|
| 93 |
+
item = profile["item"] or {}
|
| 94 |
+
return {
|
| 95 |
+
"followers_count": item.get("followers_count"),
|
| 96 |
+
"following_count": item.get("following_count"),
|
| 97 |
+
}
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
Pro-followers pattern:
|
| 101 |
+
|
| 102 |
+
```py
|
| 103 |
+
followers = await hf_user_graph(
|
| 104 |
+
relation="followers",
|
| 105 |
+
pro_only=True,
|
| 106 |
+
limit=20,
|
| 107 |
+
fields=["username"],
|
| 108 |
+
)
|
| 109 |
+
return followers["items"]
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
## Navigation graph
|
| 113 |
+
|
| 114 |
+
Use the helper that matches the question type.
|
| 115 |
+
|
| 116 |
+
- exact repo details → `hf_repo_details(...)`
|
| 117 |
+
- model search/list/discovery → `hf_models_search(...)`
|
| 118 |
+
- dataset search/list/discovery → `hf_datasets_search(...)`
|
| 119 |
+
- space search/list/discovery → `hf_spaces_search(...)`
|
| 120 |
+
- cross-type repo search → `hf_repo_search(...)`
|
| 121 |
+
- trending repos → `hf_trending(...)`
|
| 122 |
+
- daily papers → `hf_daily_papers(...)`
|
| 123 |
+
- repo discussions → `hf_repo_discussions(...)`
|
| 124 |
+
- specific discussion details → `hf_repo_discussion_details(...)`
|
| 125 |
+
- users who liked one repo → `hf_repo_likers(...)`
|
| 126 |
+
- profile / overview / aggregate counts → `hf_profile_summary(...)`
|
| 127 |
+
- followers / following lists → `hf_user_graph(...)`
|
| 128 |
+
- repos a user liked → `hf_user_likes(...)`
|
| 129 |
+
- recent activity feed → `hf_recent_activity(...)`
|
| 130 |
+
- organization members → `hf_org_members(...)`
|
| 131 |
+
- collections search → `hf_collections_search(...)`
|
| 132 |
+
- items inside a known collection → `hf_collection_items(...)`
|
| 133 |
+
- explicit current username → `hf_whoami()`
|
| 134 |
+
|
| 135 |
+
Direction reminders:
|
| 136 |
+
- `hf_user_likes(...)` = user → repos
|
| 137 |
+
- `hf_repo_likers(...)` = repo → users
|
| 138 |
+
- `hf_user_graph(...)` = user/org → followers/following
|
| 139 |
|
| 140 |
## Helper result shape
|
| 141 |
+
|
| 142 |
All helpers return:
|
| 143 |
+
|
| 144 |
```py
|
| 145 |
{
|
| 146 |
"ok": bool,
|
|
|
|
| 153 |
|
| 154 |
Rules:
|
| 155 |
- `items` is the canonical list field.
|
| 156 |
+
- `item` is just a singleton convenience.
|
| 157 |
+
- `meta` contains helper-owned execution, limit, and coverage info.
|
| 158 |
+
- When helper-owned coverage matters, prefer returning the helper envelope directly.
|
| 159 |
+
|
| 160 |
+
## High-signal output rules
|
| 161 |
+
|
| 162 |
+
- Prefer compact dict/list outputs over prose when the user asked for fields.
|
| 163 |
+
- Prefer summary helpers before detail hydration.
|
| 164 |
+
- Use canonical snake_case keys in generated code and structured output.
|
| 165 |
+
- Use `repo_id` as the display label for repos.
|
| 166 |
+
- Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
|
| 167 |
+
- For joins/intersections/rankings, fetch the needed working set first and compute locally.
|
| 168 |
+
- If the result is partial, use top-level keys `results` and `coverage`.
|
| 169 |
+
|
| 170 |
+
## Helper signatures (generated from Python)
|
| 171 |
+
|
| 172 |
+
These signatures are exported from the live runtime with `inspect.signature(...)`.
|
| 173 |
+
If prompt prose and signatures disagree, trust these signatures.
|
| 174 |
|
|
|
|
| 175 |
```py
|
| 176 |
+
await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' = None, limit: 'int' = 100, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 177 |
|
| 178 |
+
await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
+
await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
+
await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, card_data: 'bool' = False, fetch_config: 'bool' = False, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
+
await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
+
await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
+
await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
+
await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
+
await hf_repo_discussion_details(repo_type: 'str', repo_id: 'str', discussion_num: 'int', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
| 195 |
|
| 196 |
+
await hf_repo_discussions(repo_type: 'str', repo_id: 'str', limit: 'int' = 20, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
+
await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' = 20, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
|
|
|
| 201 |
|
| 202 |
+
await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
+
await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
+
await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
+
await hf_user_graph(username: 'str | None' = None, relation: 'str' = 'followers', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
+
await hf_user_likes(username: 'str | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None, sort: 'str | None' = None, ranking_window: 'int | None' = None) -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
+
await hf_whoami() -> 'dict[str, Any]'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
```
|
hf-hub-query.md
CHANGED
|
@@ -4,7 +4,7 @@ name: hf_hub_query
|
|
| 4 |
model: hf.openai/gpt-oss-120b:sambanova
|
| 5 |
use_history: false
|
| 6 |
default: true
|
| 7 |
-
description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound
|
| 8 |
shell: false
|
| 9 |
skills: []
|
| 10 |
function_tools:
|
|
|
|
| 4 |
model: hf.openai/gpt-oss-120b:sambanova
|
| 5 |
use_history: false
|
| 6 |
default: true
|
| 7 |
+
description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, and max_pages for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
|
| 8 |
shell: false
|
| 9 |
skills: []
|
| 10 |
function_tools:
|
monty_api/__pycache__/aliases.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/__pycache__/aliases.cpython-313.pyc and b/monty_api/__pycache__/aliases.cpython-313.pyc differ
|
|
|
monty_api/__pycache__/constants.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/__pycache__/constants.cpython-313.pyc and b/monty_api/__pycache__/constants.cpython-313.pyc differ
|
|
|
monty_api/__pycache__/http_runtime.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/__pycache__/http_runtime.cpython-313.pyc and b/monty_api/__pycache__/http_runtime.cpython-313.pyc differ
|
|
|
monty_api/__pycache__/registry.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/__pycache__/registry.cpython-313.pyc and b/monty_api/__pycache__/registry.cpython-313.pyc differ
|
|
|
monty_api/__pycache__/runtime_context.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/__pycache__/runtime_context.cpython-313.pyc and b/monty_api/__pycache__/runtime_context.cpython-313.pyc differ
|
|
|
monty_api/__pycache__/runtime_envelopes.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/__pycache__/runtime_envelopes.cpython-313.pyc and b/monty_api/__pycache__/runtime_envelopes.cpython-313.pyc differ
|
|
|
monty_api/__pycache__/runtime_filtering.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/__pycache__/runtime_filtering.cpython-313.pyc and b/monty_api/__pycache__/runtime_filtering.cpython-313.pyc differ
|
|
|
monty_api/__pycache__/validation.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/__pycache__/validation.cpython-313.pyc and b/monty_api/__pycache__/validation.cpython-313.pyc differ
|
|
|
monty_api/aliases.py
CHANGED
|
@@ -29,93 +29,3 @@ REPO_SORT_KEYS: dict[str, set[str]] = {
|
|
| 29 |
"trending_score",
|
| 30 |
},
|
| 31 |
}
|
| 32 |
-
|
| 33 |
-
# Alias policy:
|
| 34 |
-
# - canonical names stay canonical
|
| 35 |
-
# - support a small compatibility set for observed prompt/output variants
|
| 36 |
-
# - do not add speculative synonyms unless they appear in prompts, evals, or
|
| 37 |
-
# upstream payloads we already normalize
|
| 38 |
-
SORT_KEY_ALIASES: dict[str, str] = {
|
| 39 |
-
"createdat": "created_at",
|
| 40 |
-
"created_at": "created_at",
|
| 41 |
-
"created-at": "created_at",
|
| 42 |
-
"downloads": "downloads",
|
| 43 |
-
"likes": "likes",
|
| 44 |
-
"lastmodified": "last_modified",
|
| 45 |
-
"last_modified": "last_modified",
|
| 46 |
-
"last-modified": "last_modified",
|
| 47 |
-
"trendingscore": "trending_score",
|
| 48 |
-
"trending_score": "trending_score",
|
| 49 |
-
"trending-score": "trending_score",
|
| 50 |
-
"trending": "trending_score",
|
| 51 |
-
}
|
| 52 |
-
|
| 53 |
-
USER_FIELD_ALIASES: dict[str, str] = {
|
| 54 |
-
"login": "username",
|
| 55 |
-
"user": "username",
|
| 56 |
-
"handle": "username",
|
| 57 |
-
"name": "fullname",
|
| 58 |
-
"full_name": "fullname",
|
| 59 |
-
"is_pro": "isPro",
|
| 60 |
-
"pro": "isPro",
|
| 61 |
-
}
|
| 62 |
-
|
| 63 |
-
ACTOR_FIELD_ALIASES: dict[str, str] = {
|
| 64 |
-
**USER_FIELD_ALIASES,
|
| 65 |
-
"entity_type": "type",
|
| 66 |
-
"user_type": "type",
|
| 67 |
-
}
|
| 68 |
-
|
| 69 |
-
REPO_FIELD_ALIASES: dict[str, str] = {
|
| 70 |
-
"repoid": "repo_id",
|
| 71 |
-
"repotype": "repo_type",
|
| 72 |
-
"repourl": "repo_url",
|
| 73 |
-
"createdat": "created_at",
|
| 74 |
-
"lastmodified": "last_modified",
|
| 75 |
-
"pipelinetag": "pipeline_tag",
|
| 76 |
-
"numparams": "num_params",
|
| 77 |
-
"trendingrank": "trending_rank",
|
| 78 |
-
"trendingscore": "trending_score",
|
| 79 |
-
"libraryname": "library_name",
|
| 80 |
-
"paperswithcodeid": "paperswithcode_id",
|
| 81 |
-
"runtimestage": "runtime_stage",
|
| 82 |
-
"runtimestatus": "runtime_stage",
|
| 83 |
-
}
|
| 84 |
-
|
| 85 |
-
COLLECTION_FIELD_ALIASES: dict[str, str] = {
|
| 86 |
-
"collectionid": "collection_id",
|
| 87 |
-
"lastupdated": "last_updated",
|
| 88 |
-
"ownertype": "owner_type",
|
| 89 |
-
"itemcount": "item_count",
|
| 90 |
-
"author": "owner",
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
DAILY_PAPER_FIELD_ALIASES: dict[str, str] = {
|
| 94 |
-
"paperid": "paper_id",
|
| 95 |
-
"publishedat": "published_at",
|
| 96 |
-
"submittedondailyat": "submitted_on_daily_at",
|
| 97 |
-
"submittedby": "submitted_by",
|
| 98 |
-
"discussionid": "discussion_id",
|
| 99 |
-
"githubrepo": "github_repo_url",
|
| 100 |
-
"githubstars": "github_stars",
|
| 101 |
-
"projectpage": "project_page_url",
|
| 102 |
-
"numcomments": "num_comments",
|
| 103 |
-
"isauthorparticipating": "is_author_participating",
|
| 104 |
-
"repoid": "repo_id",
|
| 105 |
-
}
|
| 106 |
-
|
| 107 |
-
USER_LIKES_FIELD_ALIASES: dict[str, str] = {
|
| 108 |
-
"likedat": "liked_at",
|
| 109 |
-
"repoid": "repo_id",
|
| 110 |
-
"repotype": "repo_type",
|
| 111 |
-
"repoauthor": "repo_author",
|
| 112 |
-
"repolikes": "repo_likes",
|
| 113 |
-
"repodownloads": "repo_downloads",
|
| 114 |
-
}
|
| 115 |
-
|
| 116 |
-
ACTIVITY_FIELD_ALIASES: dict[str, str] = {
|
| 117 |
-
"time": "timestamp",
|
| 118 |
-
"type": "event_type",
|
| 119 |
-
"repoid": "repo_id",
|
| 120 |
-
"repotype": "repo_type",
|
| 121 |
-
}
|
|
|
|
| 29 |
"trending_score",
|
| 30 |
},
|
| 31 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
monty_api/constants.py
CHANGED
|
@@ -79,7 +79,7 @@ USER_CANONICAL_FIELDS: tuple[str, ...] = (
|
|
| 79 |
"username",
|
| 80 |
"fullname",
|
| 81 |
"bio",
|
| 82 |
-
"
|
| 83 |
"twitter",
|
| 84 |
"github",
|
| 85 |
"linkedin",
|
|
@@ -87,7 +87,7 @@ USER_CANONICAL_FIELDS: tuple[str, ...] = (
|
|
| 87 |
"followers",
|
| 88 |
"following",
|
| 89 |
"likes",
|
| 90 |
-
"
|
| 91 |
)
|
| 92 |
|
| 93 |
PROFILE_CANONICAL_FIELDS: tuple[str, ...] = (
|
|
@@ -121,11 +121,48 @@ PROFILE_CANONICAL_FIELDS: tuple[str, ...] = (
|
|
| 121 |
ACTOR_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 122 |
"username",
|
| 123 |
"fullname",
|
| 124 |
-
"
|
| 125 |
"role",
|
| 126 |
"type",
|
| 127 |
)
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
ACTIVITY_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 130 |
"event_type",
|
| 131 |
"repo_id",
|
|
|
|
| 79 |
"username",
|
| 80 |
"fullname",
|
| 81 |
"bio",
|
| 82 |
+
"website_url",
|
| 83 |
"twitter",
|
| 84 |
"github",
|
| 85 |
"linkedin",
|
|
|
|
| 87 |
"followers",
|
| 88 |
"following",
|
| 89 |
"likes",
|
| 90 |
+
"is_pro",
|
| 91 |
)
|
| 92 |
|
| 93 |
PROFILE_CANONICAL_FIELDS: tuple[str, ...] = (
|
|
|
|
| 121 |
ACTOR_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 122 |
"username",
|
| 123 |
"fullname",
|
| 124 |
+
"is_pro",
|
| 125 |
"role",
|
| 126 |
"type",
|
| 127 |
)
|
| 128 |
|
| 129 |
+
USER_LIKES_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 130 |
+
"liked_at",
|
| 131 |
+
"repo_id",
|
| 132 |
+
"repo_type",
|
| 133 |
+
"repo_author",
|
| 134 |
+
"repo_likes",
|
| 135 |
+
"repo_downloads",
|
| 136 |
+
"repo_url",
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
DISCUSSION_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 140 |
+
"num",
|
| 141 |
+
"repo_id",
|
| 142 |
+
"repo_type",
|
| 143 |
+
"title",
|
| 144 |
+
"author",
|
| 145 |
+
"created_at",
|
| 146 |
+
"status",
|
| 147 |
+
"url",
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 151 |
+
"num",
|
| 152 |
+
"repo_id",
|
| 153 |
+
"repo_type",
|
| 154 |
+
"title",
|
| 155 |
+
"author",
|
| 156 |
+
"created_at",
|
| 157 |
+
"status",
|
| 158 |
+
"url",
|
| 159 |
+
"comment_count",
|
| 160 |
+
"latest_comment_author",
|
| 161 |
+
"latest_comment_created_at",
|
| 162 |
+
"latest_comment_text",
|
| 163 |
+
"latest_comment_html",
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
ACTIVITY_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 167 |
"event_type",
|
| 168 |
"repo_id",
|
monty_api/helpers/__pycache__/activity.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/helpers/__pycache__/activity.cpython-313.pyc and b/monty_api/helpers/__pycache__/activity.cpython-313.pyc differ
|
|
|
monty_api/helpers/__pycache__/collections.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/helpers/__pycache__/collections.cpython-313.pyc and b/monty_api/helpers/__pycache__/collections.cpython-313.pyc differ
|
|
|
monty_api/helpers/__pycache__/introspection.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/helpers/__pycache__/introspection.cpython-313.pyc and b/monty_api/helpers/__pycache__/introspection.cpython-313.pyc differ
|
|
|
monty_api/helpers/__pycache__/profiles.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/helpers/__pycache__/profiles.cpython-313.pyc and b/monty_api/helpers/__pycache__/profiles.cpython-313.pyc differ
|
|
|
monty_api/helpers/__pycache__/repos.cpython-313.pyc
CHANGED
|
Binary files a/monty_api/helpers/__pycache__/repos.cpython-313.pyc and b/monty_api/helpers/__pycache__/repos.cpython-313.pyc differ
|
|
|
monty_api/helpers/activity.py
CHANGED
|
@@ -4,8 +4,8 @@ from __future__ import annotations
|
|
| 4 |
from functools import partial
|
| 5 |
from typing import Any, Callable
|
| 6 |
|
| 7 |
-
from ..aliases import ACTIVITY_FIELD_ALIASES
|
| 8 |
from ..constants import (
|
|
|
|
| 9 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 10 |
RECENT_ACTIVITY_PAGE_SIZE,
|
| 11 |
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
|
@@ -19,7 +19,7 @@ async def hf_recent_activity(
|
|
| 19 |
entity: str | None = None,
|
| 20 |
activity_types: list[str] | None = None,
|
| 21 |
repo_types: list[str] | None = None,
|
| 22 |
-
|
| 23 |
max_pages: int | None = None,
|
| 24 |
start_cursor: str | None = None,
|
| 25 |
count_only: bool = False,
|
|
@@ -27,7 +27,7 @@ async def hf_recent_activity(
|
|
| 27 |
fields: list[str] | None = None,
|
| 28 |
) -> dict[str, Any]:
|
| 29 |
start_calls = ctx.call_count["n"]
|
| 30 |
-
|
| 31 |
page_cap = ctx._policy_int(
|
| 32 |
"hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
|
| 33 |
)
|
|
@@ -56,12 +56,12 @@ async def hf_recent_activity(
|
|
| 56 |
error="entity is required",
|
| 57 |
)
|
| 58 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 59 |
-
|
| 60 |
count_only=count_only,
|
| 61 |
-
|
| 62 |
-
|
| 63 |
)
|
| 64 |
-
|
| 65 |
page_lim = page_cap
|
| 66 |
pages_lim = ctx._clamp_int(
|
| 67 |
requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
|
|
@@ -85,8 +85,17 @@ async def hf_recent_activity(
|
|
| 85 |
pages = 0
|
| 86 |
exhausted_feed = False
|
| 87 |
stopped_for_budget = False
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
if ctx._budget_remaining() <= 0:
|
| 91 |
stopped_for_budget = True
|
| 92 |
break
|
|
@@ -147,15 +156,22 @@ async def hf_recent_activity(
|
|
| 147 |
if not ctx._item_matches_where(item, normalized_where):
|
| 148 |
continue
|
| 149 |
matched += 1
|
| 150 |
-
if len(items) <
|
| 151 |
items.append(item)
|
| 152 |
if not next_cursor:
|
| 153 |
exhausted_feed = True
|
| 154 |
break
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
exact_count = exhausted_feed and (not stopped_for_budget)
|
| 157 |
sample_complete = (
|
| 158 |
-
exact_count and
|
| 159 |
)
|
| 160 |
page_limit_hit = (
|
| 161 |
next_cursor is not None and pages >= pages_lim and (not exhausted_feed)
|
|
|
|
| 4 |
from functools import partial
|
| 5 |
from typing import Any, Callable
|
| 6 |
|
|
|
|
| 7 |
from ..constants import (
|
| 8 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 9 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 10 |
RECENT_ACTIVITY_PAGE_SIZE,
|
| 11 |
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
|
|
|
| 19 |
entity: str | None = None,
|
| 20 |
activity_types: list[str] | None = None,
|
| 21 |
repo_types: list[str] | None = None,
|
| 22 |
+
limit: int | None = None,
|
| 23 |
max_pages: int | None = None,
|
| 24 |
start_cursor: str | None = None,
|
| 25 |
count_only: bool = False,
|
|
|
|
| 27 |
fields: list[str] | None = None,
|
| 28 |
) -> dict[str, Any]:
|
| 29 |
start_calls = ctx.call_count["n"]
|
| 30 |
+
default_limit = ctx._policy_int("hf_recent_activity", "default_limit", 100)
|
| 31 |
page_cap = ctx._policy_int(
|
| 32 |
"hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
|
| 33 |
)
|
|
|
|
| 56 |
error="entity is required",
|
| 57 |
)
|
| 58 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 59 |
+
limit=limit,
|
| 60 |
count_only=count_only,
|
| 61 |
+
default_limit=default_limit,
|
| 62 |
+
max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 63 |
)
|
| 64 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 65 |
page_lim = page_cap
|
| 66 |
pages_lim = ctx._clamp_int(
|
| 67 |
requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
|
|
|
|
| 85 |
pages = 0
|
| 86 |
exhausted_feed = False
|
| 87 |
stopped_for_budget = False
|
| 88 |
+
try:
|
| 89 |
+
normalized_where = ctx._normalize_where(
|
| 90 |
+
where, allowed_fields=ACTIVITY_CANONICAL_FIELDS
|
| 91 |
+
)
|
| 92 |
+
except ValueError as exc:
|
| 93 |
+
return ctx._helper_error(
|
| 94 |
+
start_calls=start_calls,
|
| 95 |
+
source="/api/recent-activity",
|
| 96 |
+
error=exc,
|
| 97 |
+
)
|
| 98 |
+
while pages < pages_lim and (applied_limit == 0 or len(items) < applied_limit):
|
| 99 |
if ctx._budget_remaining() <= 0:
|
| 100 |
stopped_for_budget = True
|
| 101 |
break
|
|
|
|
| 156 |
if not ctx._item_matches_where(item, normalized_where):
|
| 157 |
continue
|
| 158 |
matched += 1
|
| 159 |
+
if len(items) < applied_limit:
|
| 160 |
items.append(item)
|
| 161 |
if not next_cursor:
|
| 162 |
exhausted_feed = True
|
| 163 |
break
|
| 164 |
+
try:
|
| 165 |
+
items = ctx._project_activity_items(items, fields)
|
| 166 |
+
except ValueError as exc:
|
| 167 |
+
return ctx._helper_error(
|
| 168 |
+
start_calls=start_calls,
|
| 169 |
+
source="/api/recent-activity",
|
| 170 |
+
error=exc,
|
| 171 |
+
)
|
| 172 |
exact_count = exhausted_feed and (not stopped_for_budget)
|
| 173 |
sample_complete = (
|
| 174 |
+
exact_count and applied_limit >= matched and (not count_only or matched == 0)
|
| 175 |
)
|
| 176 |
page_limit_hit = (
|
| 177 |
next_cursor is not None and pages >= pages_lim and (not exhausted_feed)
|
monty_api/helpers/collections.py
CHANGED
|
@@ -4,8 +4,11 @@ from __future__ import annotations
|
|
| 4 |
from functools import partial
|
| 5 |
from typing import Any, Callable
|
| 6 |
|
| 7 |
-
from ..
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
| 9 |
from ..context_types import HelperRuntimeContext
|
| 10 |
|
| 11 |
|
|
@@ -13,25 +16,29 @@ async def hf_collections_search(
|
|
| 13 |
ctx: HelperRuntimeContext,
|
| 14 |
query: str | None = None,
|
| 15 |
owner: str | None = None,
|
| 16 |
-
|
| 17 |
count_only: bool = False,
|
| 18 |
where: dict[str, Any] | None = None,
|
| 19 |
fields: list[str] | None = None,
|
| 20 |
) -> dict[str, Any]:
|
| 21 |
start_calls = ctx.call_count["n"]
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
"hf_collections_search", "
|
| 25 |
)
|
| 26 |
if count_only:
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
| 30 |
)
|
| 31 |
owner_clean = str(owner or "").strip() or None
|
| 32 |
-
|
|
|
|
| 33 |
if owner_clean:
|
| 34 |
-
|
| 35 |
term = str(query or "").strip()
|
| 36 |
if not term and owner_clean:
|
| 37 |
term = owner_clean
|
|
@@ -41,7 +48,7 @@ async def hf_collections_search(
|
|
| 41 |
source="/api/collections",
|
| 42 |
error="query or owner is required",
|
| 43 |
)
|
| 44 |
-
params: dict[str, Any] = {"limit":
|
| 45 |
if term:
|
| 46 |
params["q"] = term
|
| 47 |
if owner_clean:
|
|
@@ -54,8 +61,43 @@ async def hf_collections_search(
|
|
| 54 |
error=resp.get("error") or "collections fetch failed",
|
| 55 |
)
|
| 56 |
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
items: list[dict[str, Any]] = []
|
| 58 |
-
for row in payload[:
|
| 59 |
if not isinstance(row, dict):
|
| 60 |
continue
|
| 61 |
row_owner = ctx._author_from_any(row.get("owner")) or ctx._author_from_any(
|
|
@@ -67,7 +109,9 @@ async def hf_collections_search(
|
|
| 67 |
and "/" in str(row.get("slug"))
|
| 68 |
):
|
| 69 |
row_owner = str(row.get("slug")).split("/", 1)[0]
|
| 70 |
-
if
|
|
|
|
|
|
|
| 71 |
continue
|
| 72 |
owner_payload = row.get("owner") if isinstance(row.get("owner"), dict) else {}
|
| 73 |
collection_items = (
|
|
@@ -89,12 +133,29 @@ async def hf_collections_search(
|
|
| 89 |
"item_count": len(collection_items),
|
| 90 |
}
|
| 91 |
)
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
total_matched = len(items)
|
| 94 |
-
items = items[:
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
truncated = (
|
| 97 |
-
|
|
|
|
| 98 |
)
|
| 99 |
return ctx._helper_success(
|
| 100 |
start_calls=start_calls,
|
|
@@ -110,6 +171,7 @@ async def hf_collections_search(
|
|
| 110 |
complete=not truncated,
|
| 111 |
query=term,
|
| 112 |
owner=owner_clean,
|
|
|
|
| 113 |
)
|
| 114 |
|
| 115 |
|
|
@@ -117,15 +179,15 @@ async def hf_collection_items(
|
|
| 117 |
ctx: HelperRuntimeContext,
|
| 118 |
collection_id: str,
|
| 119 |
repo_types: list[str] | None = None,
|
| 120 |
-
|
| 121 |
count_only: bool = False,
|
| 122 |
where: dict[str, Any] | None = None,
|
| 123 |
fields: list[str] | None = None,
|
| 124 |
) -> dict[str, Any]:
|
| 125 |
start_calls = ctx.call_count["n"]
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
"hf_collection_items", "
|
| 129 |
)
|
| 130 |
cid = str(collection_id or "").strip()
|
| 131 |
if not cid:
|
|
@@ -135,9 +197,12 @@ async def hf_collection_items(
|
|
| 135 |
error="collection_id is required",
|
| 136 |
)
|
| 137 |
if count_only:
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
| 141 |
)
|
| 142 |
allowed_repo_types: set[str] | None = None
|
| 143 |
try:
|
|
@@ -180,7 +245,17 @@ async def hf_collection_items(
|
|
| 180 |
)
|
| 181 |
if owner is None and "/" in cid:
|
| 182 |
owner = cid.split("/", 1)[0]
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
normalized: list[dict[str, Any]] = []
|
| 185 |
for row in raw_items:
|
| 186 |
if not isinstance(row, dict):
|
|
@@ -195,9 +270,17 @@ async def hf_collection_items(
|
|
| 195 |
continue
|
| 196 |
normalized.append(item)
|
| 197 |
total_matched = len(normalized)
|
| 198 |
-
items = [] if count_only else normalized[:
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
return ctx._helper_success(
|
| 202 |
start_calls=start_calls,
|
| 203 |
source=endpoint,
|
|
|
|
| 4 |
from functools import partial
|
| 5 |
from typing import Any, Callable
|
| 6 |
|
| 7 |
+
from ..constants import (
|
| 8 |
+
COLLECTION_CANONICAL_FIELDS,
|
| 9 |
+
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 10 |
+
REPO_CANONICAL_FIELDS,
|
| 11 |
+
)
|
| 12 |
from ..context_types import HelperRuntimeContext
|
| 13 |
|
| 14 |
|
|
|
|
| 16 |
ctx: HelperRuntimeContext,
|
| 17 |
query: str | None = None,
|
| 18 |
owner: str | None = None,
|
| 19 |
+
limit: int = 20,
|
| 20 |
count_only: bool = False,
|
| 21 |
where: dict[str, Any] | None = None,
|
| 22 |
fields: list[str] | None = None,
|
| 23 |
) -> dict[str, Any]:
|
| 24 |
start_calls = ctx.call_count["n"]
|
| 25 |
+
default_limit = ctx._policy_int("hf_collections_search", "default_limit", 20)
|
| 26 |
+
max_limit = ctx._policy_int(
|
| 27 |
+
"hf_collections_search", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 28 |
)
|
| 29 |
if count_only:
|
| 30 |
+
limit = 0
|
| 31 |
+
applied_limit = ctx._clamp_int(
|
| 32 |
+
limit,
|
| 33 |
+
default=default_limit,
|
| 34 |
+
minimum=0,
|
| 35 |
+
maximum=max_limit,
|
| 36 |
)
|
| 37 |
owner_clean = str(owner or "").strip() or None
|
| 38 |
+
owner_casefold = owner_clean.casefold() if owner_clean is not None else None
|
| 39 |
+
fetch_limit = max_limit if applied_limit == 0 or owner_clean else applied_limit
|
| 40 |
if owner_clean:
|
| 41 |
+
fetch_limit = min(fetch_limit, 100)
|
| 42 |
term = str(query or "").strip()
|
| 43 |
if not term and owner_clean:
|
| 44 |
term = owner_clean
|
|
|
|
| 48 |
source="/api/collections",
|
| 49 |
error="query or owner is required",
|
| 50 |
)
|
| 51 |
+
params: dict[str, Any] = {"limit": fetch_limit}
|
| 52 |
if term:
|
| 53 |
params["q"] = term
|
| 54 |
if owner_clean:
|
|
|
|
| 61 |
error=resp.get("error") or "collections fetch failed",
|
| 62 |
)
|
| 63 |
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
| 64 |
+
|
| 65 |
+
def _row_owner_matches_owner(row: Any) -> bool:
|
| 66 |
+
if owner_casefold is None or not isinstance(row, dict):
|
| 67 |
+
return owner_casefold is None
|
| 68 |
+
row_owner = ctx._author_from_any(row.get("owner")) or ctx._author_from_any(
|
| 69 |
+
row.get("ownerData")
|
| 70 |
+
)
|
| 71 |
+
if (
|
| 72 |
+
not row_owner
|
| 73 |
+
and isinstance(row.get("slug"), str)
|
| 74 |
+
and "/" in str(row.get("slug"))
|
| 75 |
+
):
|
| 76 |
+
row_owner = str(row.get("slug")).split("/", 1)[0]
|
| 77 |
+
if not isinstance(row_owner, str) or not row_owner:
|
| 78 |
+
return False
|
| 79 |
+
return row_owner.casefold() == owner_casefold
|
| 80 |
+
|
| 81 |
+
owner_fallback_used = False
|
| 82 |
+
if owner_casefold is not None and not any(
|
| 83 |
+
_row_owner_matches_owner(row) for row in payload
|
| 84 |
+
):
|
| 85 |
+
fallback_params: dict[str, Any] = {"limit": fetch_limit}
|
| 86 |
+
if term:
|
| 87 |
+
fallback_params["q"] = term
|
| 88 |
+
fallback_resp = ctx._host_raw_call("/api/collections", params=fallback_params)
|
| 89 |
+
if fallback_resp.get("ok"):
|
| 90 |
+
fallback_payload = (
|
| 91 |
+
fallback_resp.get("data")
|
| 92 |
+
if isinstance(fallback_resp.get("data"), list)
|
| 93 |
+
else []
|
| 94 |
+
)
|
| 95 |
+
if any(_row_owner_matches_owner(row) for row in fallback_payload):
|
| 96 |
+
payload = fallback_payload
|
| 97 |
+
owner_fallback_used = True
|
| 98 |
+
|
| 99 |
items: list[dict[str, Any]] = []
|
| 100 |
+
for row in payload[:fetch_limit]:
|
| 101 |
if not isinstance(row, dict):
|
| 102 |
continue
|
| 103 |
row_owner = ctx._author_from_any(row.get("owner")) or ctx._author_from_any(
|
|
|
|
| 109 |
and "/" in str(row.get("slug"))
|
| 110 |
):
|
| 111 |
row_owner = str(row.get("slug")).split("/", 1)[0]
|
| 112 |
+
if owner_casefold is not None and (
|
| 113 |
+
not isinstance(row_owner, str) or row_owner.casefold() != owner_casefold
|
| 114 |
+
):
|
| 115 |
continue
|
| 116 |
owner_payload = row.get("owner") if isinstance(row.get("owner"), dict) else {}
|
| 117 |
collection_items = (
|
|
|
|
| 133 |
"item_count": len(collection_items),
|
| 134 |
}
|
| 135 |
)
|
| 136 |
+
try:
|
| 137 |
+
items = ctx._apply_where(
|
| 138 |
+
items, where, allowed_fields=COLLECTION_CANONICAL_FIELDS
|
| 139 |
+
)
|
| 140 |
+
except ValueError as exc:
|
| 141 |
+
return ctx._helper_error(
|
| 142 |
+
start_calls=start_calls,
|
| 143 |
+
source="/api/collections",
|
| 144 |
+
error=exc,
|
| 145 |
+
)
|
| 146 |
total_matched = len(items)
|
| 147 |
+
items = items[:applied_limit]
|
| 148 |
+
try:
|
| 149 |
+
items = ctx._project_collection_items(items, fields)
|
| 150 |
+
except ValueError as exc:
|
| 151 |
+
return ctx._helper_error(
|
| 152 |
+
start_calls=start_calls,
|
| 153 |
+
source="/api/collections",
|
| 154 |
+
error=exc,
|
| 155 |
+
)
|
| 156 |
truncated = (
|
| 157 |
+
applied_limit > 0 and total_matched > applied_limit
|
| 158 |
+
or (applied_limit == 0 and len(payload) >= fetch_limit)
|
| 159 |
)
|
| 160 |
return ctx._helper_success(
|
| 161 |
start_calls=start_calls,
|
|
|
|
| 171 |
complete=not truncated,
|
| 172 |
query=term,
|
| 173 |
owner=owner_clean,
|
| 174 |
+
owner_case_insensitive_fallback=owner_fallback_used,
|
| 175 |
)
|
| 176 |
|
| 177 |
|
|
|
|
| 179 |
ctx: HelperRuntimeContext,
|
| 180 |
collection_id: str,
|
| 181 |
repo_types: list[str] | None = None,
|
| 182 |
+
limit: int = 100,
|
| 183 |
count_only: bool = False,
|
| 184 |
where: dict[str, Any] | None = None,
|
| 185 |
fields: list[str] | None = None,
|
| 186 |
) -> dict[str, Any]:
|
| 187 |
start_calls = ctx.call_count["n"]
|
| 188 |
+
default_limit = ctx._policy_int("hf_collection_items", "default_limit", 100)
|
| 189 |
+
max_limit = ctx._policy_int(
|
| 190 |
+
"hf_collection_items", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 191 |
)
|
| 192 |
cid = str(collection_id or "").strip()
|
| 193 |
if not cid:
|
|
|
|
| 197 |
error="collection_id is required",
|
| 198 |
)
|
| 199 |
if count_only:
|
| 200 |
+
limit = 0
|
| 201 |
+
applied_limit = ctx._clamp_int(
|
| 202 |
+
limit,
|
| 203 |
+
default=default_limit,
|
| 204 |
+
minimum=0,
|
| 205 |
+
maximum=max_limit,
|
| 206 |
)
|
| 207 |
allowed_repo_types: set[str] | None = None
|
| 208 |
try:
|
|
|
|
| 245 |
)
|
| 246 |
if owner is None and "/" in cid:
|
| 247 |
owner = cid.split("/", 1)[0]
|
| 248 |
+
try:
|
| 249 |
+
normalized_where = ctx._normalize_where(
|
| 250 |
+
where, allowed_fields=REPO_CANONICAL_FIELDS
|
| 251 |
+
)
|
| 252 |
+
except ValueError as exc:
|
| 253 |
+
return ctx._helper_error(
|
| 254 |
+
start_calls=start_calls,
|
| 255 |
+
source=endpoint,
|
| 256 |
+
error=exc,
|
| 257 |
+
collection_id=cid,
|
| 258 |
+
)
|
| 259 |
normalized: list[dict[str, Any]] = []
|
| 260 |
for row in raw_items:
|
| 261 |
if not isinstance(row, dict):
|
|
|
|
| 270 |
continue
|
| 271 |
normalized.append(item)
|
| 272 |
total_matched = len(normalized)
|
| 273 |
+
items = [] if count_only else normalized[:applied_limit]
|
| 274 |
+
try:
|
| 275 |
+
items = ctx._project_repo_items(items, fields)
|
| 276 |
+
except ValueError as exc:
|
| 277 |
+
return ctx._helper_error(
|
| 278 |
+
start_calls=start_calls,
|
| 279 |
+
source=endpoint,
|
| 280 |
+
error=exc,
|
| 281 |
+
collection_id=cid,
|
| 282 |
+
)
|
| 283 |
+
truncated = applied_limit > 0 and total_matched > applied_limit
|
| 284 |
return ctx._helper_success(
|
| 285 |
start_calls=start_calls,
|
| 286 |
source=endpoint,
|
monty_api/helpers/introspection.py
CHANGED
|
@@ -5,22 +5,14 @@ import inspect
|
|
| 5 |
from functools import partial
|
| 6 |
from typing import Any, Callable
|
| 7 |
|
| 8 |
-
from ..aliases import
|
| 9 |
-
ACTIVITY_FIELD_ALIASES,
|
| 10 |
-
ACTOR_FIELD_ALIASES,
|
| 11 |
-
COLLECTION_FIELD_ALIASES,
|
| 12 |
-
DAILY_PAPER_FIELD_ALIASES,
|
| 13 |
-
REPO_FIELD_ALIASES,
|
| 14 |
-
REPO_SORT_KEYS,
|
| 15 |
-
SORT_KEY_ALIASES,
|
| 16 |
-
USER_FIELD_ALIASES,
|
| 17 |
-
USER_LIKES_FIELD_ALIASES,
|
| 18 |
-
)
|
| 19 |
from ..constants import (
|
| 20 |
ACTIVITY_CANONICAL_FIELDS,
|
| 21 |
ACTOR_CANONICAL_FIELDS,
|
| 22 |
COLLECTION_CANONICAL_FIELDS,
|
| 23 |
DAILY_PAPER_CANONICAL_FIELDS,
|
|
|
|
|
|
|
| 24 |
DEFAULT_MAX_CALLS,
|
| 25 |
DEFAULT_TIMEOUT_SEC,
|
| 26 |
GRAPH_SCAN_LIMIT_CAP,
|
|
@@ -32,6 +24,7 @@ from ..constants import (
|
|
| 32 |
REPO_CANONICAL_FIELDS,
|
| 33 |
TRENDING_ENDPOINT_MAX_LIMIT,
|
| 34 |
USER_CANONICAL_FIELDS,
|
|
|
|
| 35 |
)
|
| 36 |
from ..context_types import HelperRuntimeContext
|
| 37 |
from ..registry import (
|
|
@@ -117,19 +110,12 @@ async def hf_runtime_capabilities(
|
|
| 117 |
"repo": list(REPO_CANONICAL_FIELDS),
|
| 118 |
"user": list(USER_CANONICAL_FIELDS),
|
| 119 |
"actor": list(ACTOR_CANONICAL_FIELDS),
|
|
|
|
| 120 |
"activity": list(ACTIVITY_CANONICAL_FIELDS),
|
| 121 |
"collection": list(COLLECTION_CANONICAL_FIELDS),
|
| 122 |
"daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
"repo": dict(sorted(REPO_FIELD_ALIASES.items())),
|
| 126 |
-
"user": dict(sorted(USER_FIELD_ALIASES.items())),
|
| 127 |
-
"actor": dict(sorted(ACTOR_FIELD_ALIASES.items())),
|
| 128 |
-
"user_likes": dict(sorted(USER_LIKES_FIELD_ALIASES.items())),
|
| 129 |
-
"activity": dict(sorted(ACTIVITY_FIELD_ALIASES.items())),
|
| 130 |
-
"collection": dict(sorted(COLLECTION_FIELD_ALIASES.items())),
|
| 131 |
-
"daily_paper": dict(sorted(DAILY_PAPER_FIELD_ALIASES.items())),
|
| 132 |
-
"sort_keys": dict(sorted(SORT_KEY_ALIASES.items())),
|
| 133 |
},
|
| 134 |
"helper_defaults": {
|
| 135 |
helper_name: dict(sorted(metadata.items()))
|
|
@@ -154,23 +140,131 @@ async def hf_runtime_capabilities(
|
|
| 154 |
],
|
| 155 |
},
|
| 156 |
"repo_search": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
"parameter_contract": {
|
| 158 |
-
"
|
| 159 |
-
"meaning": "Upstream Hugging Face
|
| 160 |
-
"not_for": [
|
| 161 |
-
"arbitrary normalized row fields",
|
| 162 |
-
"local-only derived/runtime fields",
|
| 163 |
-
],
|
| 164 |
},
|
| 165 |
-
"
|
| 166 |
-
"meaning":
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
},
|
| 169 |
"fields": {
|
| 170 |
"meaning": "Select which normalized row fields are returned to the caller.",
|
| 171 |
-
"
|
| 172 |
},
|
| 173 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
"sort_keys": {
|
| 175 |
repo_type: sorted(keys)
|
| 176 |
for repo_type, keys in sorted(REPO_SORT_KEYS.items())
|
|
@@ -179,37 +273,15 @@ async def hf_runtime_capabilities(
|
|
| 179 |
repo_type: sorted(args)
|
| 180 |
for repo_type, args in sorted(REPO_SEARCH_EXTRA_ARGS.items())
|
| 181 |
},
|
| 182 |
-
"expand_values": {
|
| 183 |
-
repo_type: list(values)
|
| 184 |
-
for repo_type, values in sorted(REPO_SEARCH_ALLOWED_EXPAND.items())
|
| 185 |
-
},
|
| 186 |
"space_runtime_contract": {
|
| 187 |
"returned_field": "runtime_stage",
|
| 188 |
"full_runtime_field": "runtime",
|
| 189 |
-
"preferred_filter_channel": "
|
| 190 |
-
"
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
"where": {
|
| 196 |
-
"runtime_stage": {
|
| 197 |
-
"in": ["BUILD_ERROR", "RUNTIME_ERROR"]
|
| 198 |
-
}
|
| 199 |
-
},
|
| 200 |
-
"fields": [
|
| 201 |
-
"repo_id",
|
| 202 |
-
"author",
|
| 203 |
-
"runtime_stage",
|
| 204 |
-
"repo_url",
|
| 205 |
-
],
|
| 206 |
-
},
|
| 207 |
-
"incorrect": {
|
| 208 |
-
"repo_type": "space",
|
| 209 |
-
"filters": ["state:ERROR"],
|
| 210 |
-
"fields": ["repo_id", "author", "state", "repo_url"],
|
| 211 |
-
},
|
| 212 |
-
},
|
| 213 |
},
|
| 214 |
},
|
| 215 |
}
|
|
|
|
| 5 |
from functools import partial
|
| 6 |
from typing import Any, Callable
|
| 7 |
|
| 8 |
+
from ..aliases import REPO_SORT_KEYS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from ..constants import (
|
| 10 |
ACTIVITY_CANONICAL_FIELDS,
|
| 11 |
ACTOR_CANONICAL_FIELDS,
|
| 12 |
COLLECTION_CANONICAL_FIELDS,
|
| 13 |
DAILY_PAPER_CANONICAL_FIELDS,
|
| 14 |
+
DISCUSSION_CANONICAL_FIELDS,
|
| 15 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 16 |
DEFAULT_MAX_CALLS,
|
| 17 |
DEFAULT_TIMEOUT_SEC,
|
| 18 |
GRAPH_SCAN_LIMIT_CAP,
|
|
|
|
| 24 |
REPO_CANONICAL_FIELDS,
|
| 25 |
TRENDING_ENDPOINT_MAX_LIMIT,
|
| 26 |
USER_CANONICAL_FIELDS,
|
| 27 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 28 |
)
|
| 29 |
from ..context_types import HelperRuntimeContext
|
| 30 |
from ..registry import (
|
|
|
|
| 110 |
"repo": list(REPO_CANONICAL_FIELDS),
|
| 111 |
"user": list(USER_CANONICAL_FIELDS),
|
| 112 |
"actor": list(ACTOR_CANONICAL_FIELDS),
|
| 113 |
+
"user_likes": list(USER_LIKES_CANONICAL_FIELDS),
|
| 114 |
"activity": list(ACTIVITY_CANONICAL_FIELDS),
|
| 115 |
"collection": list(COLLECTION_CANONICAL_FIELDS),
|
| 116 |
"daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
|
| 117 |
+
"discussion": list(DISCUSSION_CANONICAL_FIELDS),
|
| 118 |
+
"discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
},
|
| 120 |
"helper_defaults": {
|
| 121 |
helper_name: dict(sorted(metadata.items()))
|
|
|
|
| 140 |
],
|
| 141 |
},
|
| 142 |
"repo_search": {
|
| 143 |
+
"helper_selection": {
|
| 144 |
+
"preferred_rule": (
|
| 145 |
+
"Prefer hf_models_search for model queries, hf_datasets_search for "
|
| 146 |
+
"dataset queries, and hf_spaces_search for space queries. Use "
|
| 147 |
+
"hf_repo_search only for intentionally cross-type search."
|
| 148 |
+
),
|
| 149 |
+
"model": "hf_models_search",
|
| 150 |
+
"dataset": "hf_datasets_search",
|
| 151 |
+
"space": "hf_spaces_search",
|
| 152 |
+
"cross_type": "hf_repo_search",
|
| 153 |
+
},
|
| 154 |
+
"can_do": [
|
| 155 |
+
"search models",
|
| 156 |
+
"search datasets",
|
| 157 |
+
"search spaces",
|
| 158 |
+
"search across multiple repo types",
|
| 159 |
+
"project selected fields",
|
| 160 |
+
"apply local post-fetch row filtering",
|
| 161 |
+
],
|
| 162 |
"parameter_contract": {
|
| 163 |
+
"search": {
|
| 164 |
+
"meaning": "Upstream Hugging Face search text.",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
},
|
| 166 |
+
"filter": {
|
| 167 |
+
"meaning": (
|
| 168 |
+
"Upstream Hugging Face filter/tag argument passed directly into "
|
| 169 |
+
"the Hub client."
|
| 170 |
+
),
|
| 171 |
+
},
|
| 172 |
+
"post_filter": {
|
| 173 |
+
"meaning": (
|
| 174 |
+
"Local predicate applied after the rows are fetched and normalized."
|
| 175 |
+
),
|
| 176 |
+
"recommended_shapes": [
|
| 177 |
+
{"runtime_stage": "RUNNING"},
|
| 178 |
+
{"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}},
|
| 179 |
+
{"downloads": {"gte": 1000}},
|
| 180 |
+
{"likes": {"lte": 5000}},
|
| 181 |
+
],
|
| 182 |
+
"prefer_for": [
|
| 183 |
+
"normalized returned fields such as runtime_stage",
|
| 184 |
+
"downloads / likes thresholds after a broad search",
|
| 185 |
+
],
|
| 186 |
+
"avoid_when": [
|
| 187 |
+
"author is already a first-class helper argument",
|
| 188 |
+
"pipeline_tag is already a first-class model-search argument",
|
| 189 |
+
"dataset_name, language, task_ids, apps, models, or datasets already have first-class helper args",
|
| 190 |
+
],
|
| 191 |
},
|
| 192 |
"fields": {
|
| 193 |
"meaning": "Select which normalized row fields are returned to the caller.",
|
| 194 |
+
"canonical_only": True,
|
| 195 |
},
|
| 196 |
},
|
| 197 |
+
"repo_type_specific_helpers": {
|
| 198 |
+
"model": {
|
| 199 |
+
"helper": "hf_models_search",
|
| 200 |
+
"preferred_params": [
|
| 201 |
+
"search",
|
| 202 |
+
"filter",
|
| 203 |
+
"author",
|
| 204 |
+
"pipeline_tag",
|
| 205 |
+
"sort",
|
| 206 |
+
"limit",
|
| 207 |
+
"expand",
|
| 208 |
+
"fields",
|
| 209 |
+
"post_filter",
|
| 210 |
+
],
|
| 211 |
+
"expand_values": list(REPO_SEARCH_ALLOWED_EXPAND["model"]),
|
| 212 |
+
},
|
| 213 |
+
"dataset": {
|
| 214 |
+
"helper": "hf_datasets_search",
|
| 215 |
+
"preferred_params": [
|
| 216 |
+
"search",
|
| 217 |
+
"filter",
|
| 218 |
+
"author",
|
| 219 |
+
"dataset_name",
|
| 220 |
+
"language",
|
| 221 |
+
"task_categories",
|
| 222 |
+
"task_ids",
|
| 223 |
+
"sort",
|
| 224 |
+
"limit",
|
| 225 |
+
"expand",
|
| 226 |
+
"fields",
|
| 227 |
+
"post_filter",
|
| 228 |
+
],
|
| 229 |
+
"expand_values": list(REPO_SEARCH_ALLOWED_EXPAND["dataset"]),
|
| 230 |
+
},
|
| 231 |
+
"space": {
|
| 232 |
+
"helper": "hf_spaces_search",
|
| 233 |
+
"preferred_params": [
|
| 234 |
+
"search",
|
| 235 |
+
"filter",
|
| 236 |
+
"author",
|
| 237 |
+
"datasets",
|
| 238 |
+
"models",
|
| 239 |
+
"linked",
|
| 240 |
+
"sort",
|
| 241 |
+
"limit",
|
| 242 |
+
"expand",
|
| 243 |
+
"fields",
|
| 244 |
+
"post_filter",
|
| 245 |
+
],
|
| 246 |
+
"expand_values": list(REPO_SEARCH_ALLOWED_EXPAND["space"]),
|
| 247 |
+
},
|
| 248 |
+
},
|
| 249 |
+
"generic_helper": {
|
| 250 |
+
"helper": "hf_repo_search",
|
| 251 |
+
"use_for": "Intentionally cross-type search only.",
|
| 252 |
+
"supports": [
|
| 253 |
+
"search",
|
| 254 |
+
"repo_type",
|
| 255 |
+
"repo_types",
|
| 256 |
+
"filter",
|
| 257 |
+
"author",
|
| 258 |
+
"sort",
|
| 259 |
+
"limit",
|
| 260 |
+
"fields",
|
| 261 |
+
"post_filter",
|
| 262 |
+
],
|
| 263 |
+
"does_not_support": [
|
| 264 |
+
"repo-type-specific knobs such as pipeline_tag or dataset_name",
|
| 265 |
+
"nested advanced routing",
|
| 266 |
+
],
|
| 267 |
+
},
|
| 268 |
"sort_keys": {
|
| 269 |
repo_type: sorted(keys)
|
| 270 |
for repo_type, keys in sorted(REPO_SORT_KEYS.items())
|
|
|
|
| 273 |
repo_type: sorted(args)
|
| 274 |
for repo_type, args in sorted(REPO_SEARCH_EXTRA_ARGS.items())
|
| 275 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
"space_runtime_contract": {
|
| 277 |
"returned_field": "runtime_stage",
|
| 278 |
"full_runtime_field": "runtime",
|
| 279 |
+
"preferred_filter_channel": "post_filter",
|
| 280 |
+
"note": (
|
| 281 |
+
"Treat runtime_stage like any other returned field: use exact values "
|
| 282 |
+
"or an 'in' list in post_filter."
|
| 283 |
+
),
|
| 284 |
+
"common_values": ["BUILD_ERROR", "RUNTIME_ERROR", "RUNNING", "SLEEPING"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
},
|
| 286 |
},
|
| 287 |
}
|
monty_api/helpers/profiles.py
CHANGED
|
@@ -5,11 +5,8 @@ from itertools import islice
|
|
| 5 |
import re
|
| 6 |
from typing import Any, Callable
|
| 7 |
from ..context_types import HelperRuntimeContext
|
| 8 |
-
from ..aliases import (
|
| 9 |
-
ACTOR_FIELD_ALIASES,
|
| 10 |
-
USER_FIELD_ALIASES,
|
| 11 |
-
)
|
| 12 |
from ..constants import (
|
|
|
|
| 13 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 14 |
GRAPH_SCAN_LIMIT_CAP,
|
| 15 |
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
|
@@ -74,7 +71,7 @@ async def hf_whoami(ctx: HelperRuntimeContext) -> dict[str, Any]:
|
|
| 74 |
item = {
|
| 75 |
"username": username,
|
| 76 |
"fullname": payload.get("fullname"),
|
| 77 |
-
"
|
| 78 |
}
|
| 79 |
items = [item] if isinstance(username, str) and username else []
|
| 80 |
return ctx._helper_success(
|
|
@@ -148,16 +145,16 @@ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[st
|
|
| 148 |
"username": obj.username or u,
|
| 149 |
"fullname": obj.fullname,
|
| 150 |
"bio": getattr(obj, "details", None),
|
| 151 |
-
"
|
| 152 |
-
"
|
| 153 |
"twitter": _social_url("twitter", twitter_handle),
|
| 154 |
"github": _social_url("github", github_handle),
|
| 155 |
"linkedin": _social_url("linkedin", linkedin_handle),
|
| 156 |
"bluesky": _social_url("bluesky", bluesky_handle),
|
| 157 |
-
"
|
| 158 |
-
"
|
| 159 |
-
"
|
| 160 |
-
"
|
| 161 |
"followers": ctx._as_int(obj.num_followers),
|
| 162 |
"following": ctx._as_int(obj.num_following),
|
| 163 |
"likes": ctx._as_int(obj.num_likes),
|
|
@@ -168,7 +165,7 @@ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[st
|
|
| 168 |
"papers": ctx._as_int(getattr(obj, "num_papers", None)),
|
| 169 |
"upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
|
| 170 |
"orgs": org_names,
|
| 171 |
-
"
|
| 172 |
}
|
| 173 |
return ctx._helper_success(
|
| 174 |
start_calls=start_calls,
|
|
@@ -202,10 +199,10 @@ async def _hf_org_overview(
|
|
| 202 |
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 203 |
item = {
|
| 204 |
"organization": obj.name or org,
|
| 205 |
-
"
|
| 206 |
-
"
|
| 207 |
"description": obj.details,
|
| 208 |
-
"
|
| 209 |
"followers": ctx._as_int(obj.num_followers),
|
| 210 |
"members": ctx._as_int(obj.num_users),
|
| 211 |
"models": ctx._as_int(getattr(obj, "num_models", None)),
|
|
@@ -226,7 +223,7 @@ async def _hf_org_overview(
|
|
| 226 |
async def hf_org_members(
|
| 227 |
ctx: HelperRuntimeContext,
|
| 228 |
organization: str,
|
| 229 |
-
|
| 230 |
scan_limit: int | None = None,
|
| 231 |
count_only: bool = False,
|
| 232 |
where: dict[str, Any] | None = None,
|
|
@@ -240,17 +237,17 @@ async def hf_org_members(
|
|
| 240 |
source="/api/organizations/<o>/members",
|
| 241 |
error="organization is required",
|
| 242 |
)
|
| 243 |
-
|
| 244 |
scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 245 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 246 |
-
|
| 247 |
count_only=count_only,
|
| 248 |
-
|
| 249 |
-
|
| 250 |
scan_limit=scan_limit,
|
| 251 |
scan_cap=scan_cap,
|
| 252 |
)
|
| 253 |
-
|
| 254 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 255 |
has_where = isinstance(where, dict) and bool(where)
|
| 256 |
overview_total: int | None = None
|
|
@@ -299,11 +296,21 @@ async def hf_org_members(
|
|
| 299 |
item = {
|
| 300 |
"username": handle,
|
| 301 |
"fullname": getattr(row, "fullname", None),
|
| 302 |
-
"
|
| 303 |
"role": getattr(row, "role", None),
|
| 304 |
}
|
| 305 |
normalized.append(item)
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
observed_total = len(rows)
|
| 308 |
scan_exhaustive = observed_total < scan_lim
|
| 309 |
overview_list_mismatch = (
|
|
@@ -324,14 +331,14 @@ async def hf_org_members(
|
|
| 324 |
total = observed_total
|
| 325 |
total_matched = observed_total
|
| 326 |
total_available = overview_total if overview_total is not None else observed_total
|
| 327 |
-
items = normalized[:
|
| 328 |
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 329 |
count_source = (
|
| 330 |
"overview" if overview_total is not None and (not has_where) else "scan"
|
| 331 |
)
|
| 332 |
sample_complete = (
|
| 333 |
exact_count
|
| 334 |
-
and len(normalized) <=
|
| 335 |
and (not count_only or len(normalized) == 0)
|
| 336 |
)
|
| 337 |
more_available = ctx._derive_more_available(
|
|
@@ -342,7 +349,15 @@ async def hf_org_members(
|
|
| 342 |
)
|
| 343 |
if not exact_count and scan_limit_hit:
|
| 344 |
more_available = "unknown" if has_where else True
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
meta = ctx._build_exhaustive_result_meta(
|
| 347 |
base_meta={
|
| 348 |
"scanned": observed_total,
|
|
@@ -375,7 +390,7 @@ async def _user_graph_helper(
|
|
| 375 |
kind: str,
|
| 376 |
username: str,
|
| 377 |
pro_only: bool | None,
|
| 378 |
-
|
| 379 |
scan_limit: int | None,
|
| 380 |
count_only: bool,
|
| 381 |
where: dict[str, Any] | None,
|
|
@@ -384,10 +399,10 @@ async def _user_graph_helper(
|
|
| 384 |
helper_name: str,
|
| 385 |
) -> dict[str, Any]:
|
| 386 |
start_calls = ctx.call_count["n"]
|
| 387 |
-
|
| 388 |
scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 389 |
-
|
| 390 |
-
helper_name, "
|
| 391 |
)
|
| 392 |
u = str(username or "").strip()
|
| 393 |
if not u:
|
|
@@ -397,14 +412,14 @@ async def _user_graph_helper(
|
|
| 397 |
error="username is required",
|
| 398 |
)
|
| 399 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 400 |
-
|
| 401 |
count_only=count_only,
|
| 402 |
-
|
| 403 |
-
|
| 404 |
scan_limit=scan_limit,
|
| 405 |
scan_cap=scan_cap,
|
| 406 |
)
|
| 407 |
-
|
| 408 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 409 |
has_where = isinstance(where, dict) and bool(where)
|
| 410 |
filtered = pro_only is not None or has_where
|
|
@@ -509,14 +524,28 @@ async def _user_graph_helper(
|
|
| 509 |
item = {
|
| 510 |
"username": handle,
|
| 511 |
"fullname": getattr(row, "fullname", None),
|
| 512 |
-
"
|
| 513 |
}
|
| 514 |
-
if pro_only is True and item.get("
|
| 515 |
continue
|
| 516 |
-
if pro_only is False and item.get("
|
| 517 |
continue
|
| 518 |
normalized.append(item)
|
| 519 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 520 |
observed_total = len(rows)
|
| 521 |
scan_exhaustive = observed_total < scan_lim
|
| 522 |
overview_list_mismatch = (
|
|
@@ -537,14 +566,14 @@ async def _user_graph_helper(
|
|
| 537 |
total = observed_total
|
| 538 |
total_matched = observed_total
|
| 539 |
total_available = overview_total if overview_total is not None else observed_total
|
| 540 |
-
items = normalized[:
|
| 541 |
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 542 |
count_source = (
|
| 543 |
"overview" if overview_total is not None and (not filtered) else "scan"
|
| 544 |
)
|
| 545 |
sample_complete = (
|
| 546 |
exact_count
|
| 547 |
-
and len(normalized) <=
|
| 548 |
and (not count_only or len(normalized) == 0)
|
| 549 |
)
|
| 550 |
more_available = ctx._derive_more_available(
|
|
@@ -555,7 +584,19 @@ async def _user_graph_helper(
|
|
| 555 |
)
|
| 556 |
if not exact_count and scan_limit_hit:
|
| 557 |
more_available = "unknown" if filtered else True
|
| 558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
meta = ctx._build_exhaustive_result_meta(
|
| 560 |
base_meta={
|
| 561 |
"scanned": observed_total,
|
|
@@ -645,8 +686,8 @@ async def hf_profile_summary(
|
|
| 645 |
"display_name": overview_item.get("fullname")
|
| 646 |
or str(overview_item.get("username") or resolved_handle),
|
| 647 |
"bio": overview_item.get("bio"),
|
| 648 |
-
"avatar_url": overview_item.get("
|
| 649 |
-
"website_url": overview_item.get("
|
| 650 |
"twitter_url": overview_item.get("twitter"),
|
| 651 |
"github_url": overview_item.get("github"),
|
| 652 |
"linkedin_url": overview_item.get("linkedin"),
|
|
@@ -661,13 +702,13 @@ async def hf_profile_summary(
|
|
| 661 |
"papers_count": ctx._overview_count(overview_item, "papers"),
|
| 662 |
"upvotes_count": ctx._overview_count(overview_item, "upvotes"),
|
| 663 |
"organizations": overview_item.get("orgs"),
|
| 664 |
-
"is_pro": overview_item.get("
|
| 665 |
}
|
| 666 |
if "likes" in requested_sections:
|
| 667 |
likes = await ctx.call_helper(
|
| 668 |
"hf_user_likes",
|
| 669 |
username=resolved_handle,
|
| 670 |
-
|
| 671 |
scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
|
| 672 |
count_only=likes_lim == 0,
|
| 673 |
sort="liked_at",
|
|
@@ -689,7 +730,7 @@ async def hf_profile_summary(
|
|
| 689 |
"hf_recent_activity",
|
| 690 |
feed_type="user",
|
| 691 |
entity=resolved_handle,
|
| 692 |
-
|
| 693 |
max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
|
| 694 |
count_only=activity_lim == 0,
|
| 695 |
fields=["timestamp", "event_type", "repo_type", "repo_id"],
|
|
@@ -724,11 +765,11 @@ async def hf_profile_summary(
|
|
| 724 |
item = {
|
| 725 |
"handle": str(overview_item.get("organization") or resolved_handle),
|
| 726 |
"entity_type": "organization",
|
| 727 |
-
"display_name": overview_item.get("
|
| 728 |
or str(overview_item.get("organization") or resolved_handle),
|
| 729 |
"description": overview_item.get("description"),
|
| 730 |
-
"avatar_url": overview_item.get("
|
| 731 |
-
"website_url": overview_item.get("
|
| 732 |
"followers_count": ctx._overview_count(overview_item, "followers"),
|
| 733 |
"members_count": ctx._overview_count(overview_item, "members"),
|
| 734 |
"models_count": ctx._overview_count(overview_item, "models"),
|
|
@@ -765,7 +806,7 @@ async def hf_user_graph(
|
|
| 765 |
ctx: HelperRuntimeContext,
|
| 766 |
username: str | None = None,
|
| 767 |
relation: str = "followers",
|
| 768 |
-
|
| 769 |
scan_limit: int | None = None,
|
| 770 |
count_only: bool = False,
|
| 771 |
pro_only: bool | None = None,
|
|
@@ -800,7 +841,7 @@ async def hf_user_graph(
|
|
| 800 |
rel,
|
| 801 |
resolved_username,
|
| 802 |
pro_only,
|
| 803 |
-
|
| 804 |
scan_limit,
|
| 805 |
count_only,
|
| 806 |
where,
|
|
|
|
| 5 |
import re
|
| 6 |
from typing import Any, Callable
|
| 7 |
from ..context_types import HelperRuntimeContext
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from ..constants import (
|
| 9 |
+
ACTOR_CANONICAL_FIELDS,
|
| 10 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 11 |
GRAPH_SCAN_LIMIT_CAP,
|
| 12 |
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
|
|
|
| 71 |
item = {
|
| 72 |
"username": username,
|
| 73 |
"fullname": payload.get("fullname"),
|
| 74 |
+
"is_pro": payload.get("isPro"),
|
| 75 |
}
|
| 76 |
items = [item] if isinstance(username, str) and username else []
|
| 77 |
return ctx._helper_success(
|
|
|
|
| 145 |
"username": obj.username or u,
|
| 146 |
"fullname": obj.fullname,
|
| 147 |
"bio": getattr(obj, "details", None),
|
| 148 |
+
"avatar_url": obj.avatar_url,
|
| 149 |
+
"website_url": getattr(obj, "websiteUrl", None),
|
| 150 |
"twitter": _social_url("twitter", twitter_handle),
|
| 151 |
"github": _social_url("github", github_handle),
|
| 152 |
"linkedin": _social_url("linkedin", linkedin_handle),
|
| 153 |
"bluesky": _social_url("bluesky", bluesky_handle),
|
| 154 |
+
"twitter_handle": twitter_handle,
|
| 155 |
+
"github_handle": github_handle,
|
| 156 |
+
"linkedin_handle": linkedin_handle,
|
| 157 |
+
"bluesky_handle": bluesky_handle,
|
| 158 |
"followers": ctx._as_int(obj.num_followers),
|
| 159 |
"following": ctx._as_int(obj.num_following),
|
| 160 |
"likes": ctx._as_int(obj.num_likes),
|
|
|
|
| 165 |
"papers": ctx._as_int(getattr(obj, "num_papers", None)),
|
| 166 |
"upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
|
| 167 |
"orgs": org_names,
|
| 168 |
+
"is_pro": obj.is_pro,
|
| 169 |
}
|
| 170 |
return ctx._helper_success(
|
| 171 |
start_calls=start_calls,
|
|
|
|
| 199 |
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 200 |
item = {
|
| 201 |
"organization": obj.name or org,
|
| 202 |
+
"display_name": obj.fullname,
|
| 203 |
+
"avatar_url": obj.avatar_url,
|
| 204 |
"description": obj.details,
|
| 205 |
+
"website_url": getattr(obj, "websiteUrl", None),
|
| 206 |
"followers": ctx._as_int(obj.num_followers),
|
| 207 |
"members": ctx._as_int(obj.num_users),
|
| 208 |
"models": ctx._as_int(getattr(obj, "num_models", None)),
|
|
|
|
| 223 |
async def hf_org_members(
|
| 224 |
ctx: HelperRuntimeContext,
|
| 225 |
organization: str,
|
| 226 |
+
limit: int | None = None,
|
| 227 |
scan_limit: int | None = None,
|
| 228 |
count_only: bool = False,
|
| 229 |
where: dict[str, Any] | None = None,
|
|
|
|
| 237 |
source="/api/organizations/<o>/members",
|
| 238 |
error="organization is required",
|
| 239 |
)
|
| 240 |
+
default_limit = ctx._policy_int("hf_org_members", "default_limit", 100)
|
| 241 |
scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 242 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 243 |
+
limit=limit,
|
| 244 |
count_only=count_only,
|
| 245 |
+
default_limit=default_limit,
|
| 246 |
+
max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 247 |
scan_limit=scan_limit,
|
| 248 |
scan_cap=scan_cap,
|
| 249 |
)
|
| 250 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 251 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 252 |
has_where = isinstance(where, dict) and bool(where)
|
| 253 |
overview_total: int | None = None
|
|
|
|
| 296 |
item = {
|
| 297 |
"username": handle,
|
| 298 |
"fullname": getattr(row, "fullname", None),
|
| 299 |
+
"is_pro": getattr(row, "is_pro", None),
|
| 300 |
"role": getattr(row, "role", None),
|
| 301 |
}
|
| 302 |
normalized.append(item)
|
| 303 |
+
try:
|
| 304 |
+
normalized = ctx._apply_where(
|
| 305 |
+
normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
|
| 306 |
+
)
|
| 307 |
+
except ValueError as exc:
|
| 308 |
+
return ctx._helper_error(
|
| 309 |
+
start_calls=start_calls,
|
| 310 |
+
source=endpoint,
|
| 311 |
+
error=exc,
|
| 312 |
+
organization=org,
|
| 313 |
+
)
|
| 314 |
observed_total = len(rows)
|
| 315 |
scan_exhaustive = observed_total < scan_lim
|
| 316 |
overview_list_mismatch = (
|
|
|
|
| 331 |
total = observed_total
|
| 332 |
total_matched = observed_total
|
| 333 |
total_available = overview_total if overview_total is not None else observed_total
|
| 334 |
+
items = normalized[:applied_limit]
|
| 335 |
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 336 |
count_source = (
|
| 337 |
"overview" if overview_total is not None and (not has_where) else "scan"
|
| 338 |
)
|
| 339 |
sample_complete = (
|
| 340 |
exact_count
|
| 341 |
+
and len(normalized) <= applied_limit
|
| 342 |
and (not count_only or len(normalized) == 0)
|
| 343 |
)
|
| 344 |
more_available = ctx._derive_more_available(
|
|
|
|
| 349 |
)
|
| 350 |
if not exact_count and scan_limit_hit:
|
| 351 |
more_available = "unknown" if has_where else True
|
| 352 |
+
try:
|
| 353 |
+
items = ctx._project_actor_items(items, fields)
|
| 354 |
+
except ValueError as exc:
|
| 355 |
+
return ctx._helper_error(
|
| 356 |
+
start_calls=start_calls,
|
| 357 |
+
source=endpoint,
|
| 358 |
+
error=exc,
|
| 359 |
+
organization=org,
|
| 360 |
+
)
|
| 361 |
meta = ctx._build_exhaustive_result_meta(
|
| 362 |
base_meta={
|
| 363 |
"scanned": observed_total,
|
|
|
|
| 390 |
kind: str,
|
| 391 |
username: str,
|
| 392 |
pro_only: bool | None,
|
| 393 |
+
limit: int | None,
|
| 394 |
scan_limit: int | None,
|
| 395 |
count_only: bool,
|
| 396 |
where: dict[str, Any] | None,
|
|
|
|
| 399 |
helper_name: str,
|
| 400 |
) -> dict[str, Any]:
|
| 401 |
start_calls = ctx.call_count["n"]
|
| 402 |
+
default_limit = ctx._policy_int(helper_name, "default_limit", 100)
|
| 403 |
scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 404 |
+
max_limit = ctx._policy_int(
|
| 405 |
+
helper_name, "max_limit", EXHAUSTIVE_HELPER_RETURN_HARD_CAP
|
| 406 |
)
|
| 407 |
u = str(username or "").strip()
|
| 408 |
if not u:
|
|
|
|
| 412 |
error="username is required",
|
| 413 |
)
|
| 414 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 415 |
+
limit=limit,
|
| 416 |
count_only=count_only,
|
| 417 |
+
default_limit=default_limit,
|
| 418 |
+
max_limit=max_limit,
|
| 419 |
scan_limit=scan_limit,
|
| 420 |
scan_cap=scan_cap,
|
| 421 |
)
|
| 422 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 423 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 424 |
has_where = isinstance(where, dict) and bool(where)
|
| 425 |
filtered = pro_only is not None or has_where
|
|
|
|
| 524 |
item = {
|
| 525 |
"username": handle,
|
| 526 |
"fullname": getattr(row, "fullname", None),
|
| 527 |
+
"is_pro": getattr(row, "is_pro", None),
|
| 528 |
}
|
| 529 |
+
if pro_only is True and item.get("is_pro") is not True:
|
| 530 |
continue
|
| 531 |
+
if pro_only is False and item.get("is_pro") is True:
|
| 532 |
continue
|
| 533 |
normalized.append(item)
|
| 534 |
+
try:
|
| 535 |
+
normalized = ctx._apply_where(
|
| 536 |
+
normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
|
| 537 |
+
)
|
| 538 |
+
except ValueError as exc:
|
| 539 |
+
return ctx._helper_error(
|
| 540 |
+
start_calls=start_calls,
|
| 541 |
+
source=endpoint,
|
| 542 |
+
error=exc,
|
| 543 |
+
relation=kind,
|
| 544 |
+
username=u,
|
| 545 |
+
entity=u,
|
| 546 |
+
entity_type=entity_type,
|
| 547 |
+
organization=u if entity_type == "organization" else None,
|
| 548 |
+
)
|
| 549 |
observed_total = len(rows)
|
| 550 |
scan_exhaustive = observed_total < scan_lim
|
| 551 |
overview_list_mismatch = (
|
|
|
|
| 566 |
total = observed_total
|
| 567 |
total_matched = observed_total
|
| 568 |
total_available = overview_total if overview_total is not None else observed_total
|
| 569 |
+
items = normalized[:applied_limit]
|
| 570 |
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 571 |
count_source = (
|
| 572 |
"overview" if overview_total is not None and (not filtered) else "scan"
|
| 573 |
)
|
| 574 |
sample_complete = (
|
| 575 |
exact_count
|
| 576 |
+
and len(normalized) <= applied_limit
|
| 577 |
and (not count_only or len(normalized) == 0)
|
| 578 |
)
|
| 579 |
more_available = ctx._derive_more_available(
|
|
|
|
| 584 |
)
|
| 585 |
if not exact_count and scan_limit_hit:
|
| 586 |
more_available = "unknown" if filtered else True
|
| 587 |
+
try:
|
| 588 |
+
items = ctx._project_actor_items(items, fields)
|
| 589 |
+
except ValueError as exc:
|
| 590 |
+
return ctx._helper_error(
|
| 591 |
+
start_calls=start_calls,
|
| 592 |
+
source=endpoint,
|
| 593 |
+
error=exc,
|
| 594 |
+
relation=kind,
|
| 595 |
+
username=u,
|
| 596 |
+
entity=u,
|
| 597 |
+
entity_type=entity_type,
|
| 598 |
+
organization=u if entity_type == "organization" else None,
|
| 599 |
+
)
|
| 600 |
meta = ctx._build_exhaustive_result_meta(
|
| 601 |
base_meta={
|
| 602 |
"scanned": observed_total,
|
|
|
|
| 686 |
"display_name": overview_item.get("fullname")
|
| 687 |
or str(overview_item.get("username") or resolved_handle),
|
| 688 |
"bio": overview_item.get("bio"),
|
| 689 |
+
"avatar_url": overview_item.get("avatar_url"),
|
| 690 |
+
"website_url": overview_item.get("website_url"),
|
| 691 |
"twitter_url": overview_item.get("twitter"),
|
| 692 |
"github_url": overview_item.get("github"),
|
| 693 |
"linkedin_url": overview_item.get("linkedin"),
|
|
|
|
| 702 |
"papers_count": ctx._overview_count(overview_item, "papers"),
|
| 703 |
"upvotes_count": ctx._overview_count(overview_item, "upvotes"),
|
| 704 |
"organizations": overview_item.get("orgs"),
|
| 705 |
+
"is_pro": overview_item.get("is_pro"),
|
| 706 |
}
|
| 707 |
if "likes" in requested_sections:
|
| 708 |
likes = await ctx.call_helper(
|
| 709 |
"hf_user_likes",
|
| 710 |
username=resolved_handle,
|
| 711 |
+
limit=likes_lim,
|
| 712 |
scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
|
| 713 |
count_only=likes_lim == 0,
|
| 714 |
sort="liked_at",
|
|
|
|
| 730 |
"hf_recent_activity",
|
| 731 |
feed_type="user",
|
| 732 |
entity=resolved_handle,
|
| 733 |
+
limit=activity_lim,
|
| 734 |
max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
|
| 735 |
count_only=activity_lim == 0,
|
| 736 |
fields=["timestamp", "event_type", "repo_type", "repo_id"],
|
|
|
|
| 765 |
item = {
|
| 766 |
"handle": str(overview_item.get("organization") or resolved_handle),
|
| 767 |
"entity_type": "organization",
|
| 768 |
+
"display_name": overview_item.get("display_name")
|
| 769 |
or str(overview_item.get("organization") or resolved_handle),
|
| 770 |
"description": overview_item.get("description"),
|
| 771 |
+
"avatar_url": overview_item.get("avatar_url"),
|
| 772 |
+
"website_url": overview_item.get("website_url"),
|
| 773 |
"followers_count": ctx._overview_count(overview_item, "followers"),
|
| 774 |
"members_count": ctx._overview_count(overview_item, "members"),
|
| 775 |
"models_count": ctx._overview_count(overview_item, "models"),
|
|
|
|
| 806 |
ctx: HelperRuntimeContext,
|
| 807 |
username: str | None = None,
|
| 808 |
relation: str = "followers",
|
| 809 |
+
limit: int | None = None,
|
| 810 |
scan_limit: int | None = None,
|
| 811 |
count_only: bool = False,
|
| 812 |
pro_only: bool | None = None,
|
|
|
|
| 841 |
rel,
|
| 842 |
resolved_username,
|
| 843 |
pro_only,
|
| 844 |
+
limit,
|
| 845 |
scan_limit,
|
| 846 |
count_only,
|
| 847 |
where,
|
monty_api/helpers/repos.py
CHANGED
|
@@ -5,20 +5,18 @@ from itertools import islice
|
|
| 5 |
from typing import Any, Callable
|
| 6 |
from huggingface_hub import HfApi
|
| 7 |
from ..context_types import HelperRuntimeContext
|
| 8 |
-
from ..aliases import (
|
| 9 |
-
ACTOR_FIELD_ALIASES,
|
| 10 |
-
DAILY_PAPER_FIELD_ALIASES,
|
| 11 |
-
REPO_FIELD_ALIASES,
|
| 12 |
-
USER_LIKES_FIELD_ALIASES,
|
| 13 |
-
)
|
| 14 |
from ..constants import (
|
|
|
|
|
|
|
| 15 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 16 |
LIKES_ENRICHMENT_MAX_REPOS,
|
| 17 |
LIKES_RANKING_WINDOW_DEFAULT,
|
| 18 |
LIKES_SCAN_LIMIT_CAP,
|
| 19 |
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
|
|
|
| 20 |
SELECTIVE_ENDPOINT_RETURN_HARD_CAP,
|
| 21 |
TRENDING_ENDPOINT_MAX_LIMIT,
|
|
|
|
| 22 |
)
|
| 23 |
from ..registry import (
|
| 24 |
REPO_SEARCH_ALLOWED_EXPAND,
|
|
@@ -42,11 +40,7 @@ def _sanitize_repo_expand_values(
|
|
| 42 |
elif isinstance(raw_expand, (list, tuple, set)):
|
| 43 |
requested_values = list(raw_expand)
|
| 44 |
else:
|
| 45 |
-
return (
|
| 46 |
-
None,
|
| 47 |
-
[],
|
| 48 |
-
"advanced['expand'] must be a string or a list of strings",
|
| 49 |
-
)
|
| 50 |
|
| 51 |
cleaned: list[str] = []
|
| 52 |
for value in requested_values:
|
|
@@ -60,254 +54,259 @@ def _sanitize_repo_expand_values(
|
|
| 60 |
return (kept or None, dropped, None)
|
| 61 |
|
| 62 |
|
| 63 |
-
def
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
|
| 76 |
-
author_value = remaining_where.get("author")
|
| 77 |
-
if normalized_author is None and isinstance(author_value, str) and author_value.strip():
|
| 78 |
-
normalized_author = author_value.strip()
|
| 79 |
-
promoted["author"] = normalized_author
|
| 80 |
-
remaining_where.pop("author", None)
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
def _normalize_user_likes_sort(sort: str | None) -> tuple[str | None, str | None]:
|
| 93 |
-
|
| 94 |
-
alias_map = {
|
| 95 |
-
"": "liked_at",
|
| 96 |
-
"likedat": "liked_at",
|
| 97 |
-
"liked_at": "liked_at",
|
| 98 |
-
"liked-at": "liked_at",
|
| 99 |
-
"recency": "liked_at",
|
| 100 |
-
"repolikes": "repo_likes",
|
| 101 |
-
"repo_likes": "repo_likes",
|
| 102 |
-
"repo-likes": "repo_likes",
|
| 103 |
-
"repodownloads": "repo_downloads",
|
| 104 |
-
"repo_downloads": "repo_downloads",
|
| 105 |
-
"repo-downloads": "repo_downloads",
|
| 106 |
-
}
|
| 107 |
-
normalized = alias_map.get(raw.lower(), raw)
|
| 108 |
if normalized not in {"liked_at", "repo_likes", "repo_downloads"}:
|
| 109 |
return (None, "sort must be one of liked_at, repo_likes, repo_downloads")
|
| 110 |
return (normalized, None)
|
| 111 |
|
| 112 |
|
| 113 |
-
async def
|
| 114 |
ctx: HelperRuntimeContext,
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
fields: list[str] | None
|
| 124 |
-
|
|
|
|
| 125 |
) -> dict[str, Any]:
|
| 126 |
start_calls = ctx.call_count["n"]
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
)
|
| 131 |
-
|
|
|
|
| 132 |
return ctx._helper_error(
|
| 133 |
start_calls=start_calls,
|
| 134 |
source="/api/repos",
|
| 135 |
-
error=
|
| 136 |
)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
else:
|
| 141 |
-
rt = ctx._canonical_repo_type(repo_type, default="")
|
| 142 |
-
if rt not in {"model", "dataset", "space"}:
|
| 143 |
-
return ctx._helper_error(
|
| 144 |
-
start_calls=start_calls,
|
| 145 |
-
source="/api/repos",
|
| 146 |
-
error=f"Unsupported repo_type '{repo_type}'",
|
| 147 |
-
)
|
| 148 |
-
requested_repo_types = [rt]
|
| 149 |
-
else:
|
| 150 |
-
raw_types = ctx._coerce_str_list(repo_types)
|
| 151 |
-
if not raw_types:
|
| 152 |
-
return ctx._helper_error(
|
| 153 |
-
start_calls=start_calls,
|
| 154 |
-
source="/api/repos",
|
| 155 |
-
error="repo_types must not be empty",
|
| 156 |
-
)
|
| 157 |
-
requested_repo_types: list[str] = []
|
| 158 |
-
for raw in raw_types:
|
| 159 |
-
rt = ctx._canonical_repo_type(raw, default="")
|
| 160 |
-
if rt not in {"model", "dataset", "space"}:
|
| 161 |
-
return ctx._helper_error(
|
| 162 |
-
start_calls=start_calls,
|
| 163 |
-
source="/api/repos",
|
| 164 |
-
error=f"Unsupported repo_type '{raw}'",
|
| 165 |
-
)
|
| 166 |
-
requested_repo_types.append(rt)
|
| 167 |
-
filter_list = ctx._coerce_str_list(filters)
|
| 168 |
-
term = str(query or "").strip()
|
| 169 |
-
author_clean = str(author or "").strip() or None
|
| 170 |
requested_limit = limit
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
limit_meta = ctx._derive_limit_metadata(
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
default_limit_used=limit ==
|
| 176 |
)
|
| 177 |
hard_cap_applied = bool(limit_meta.get("hard_cap_applied"))
|
| 178 |
-
|
| 179 |
-
return ctx._helper_error(
|
| 180 |
-
start_calls=start_calls,
|
| 181 |
-
source="/api/repos",
|
| 182 |
-
error="advanced must be a dict when provided",
|
| 183 |
-
)
|
| 184 |
-
if advanced is not None and len(requested_repo_types) != 1:
|
| 185 |
-
return ctx._helper_error(
|
| 186 |
-
start_calls=start_calls,
|
| 187 |
-
source="/api/repos",
|
| 188 |
-
error="advanced may only be used with a single repo_type",
|
| 189 |
-
)
|
| 190 |
sort_keys: dict[str, str | None] = {}
|
| 191 |
-
for
|
| 192 |
-
sort_key, sort_error = ctx._normalize_repo_sort_key(
|
| 193 |
if sort_error:
|
| 194 |
return ctx._helper_error(
|
| 195 |
-
start_calls=start_calls,
|
|
|
|
|
|
|
| 196 |
)
|
| 197 |
-
sort_keys[
|
|
|
|
| 198 |
all_items: list[dict[str, Any]] = []
|
| 199 |
scanned = 0
|
| 200 |
source_endpoints: list[str] = []
|
| 201 |
limit_boundary_hit = False
|
| 202 |
ignored_expand: dict[str, list[str]] = {}
|
| 203 |
-
promoted_where_filters: dict[str, Any] = {}
|
| 204 |
-
effective_where = dict(where) if isinstance(where, dict) else where
|
| 205 |
api = ctx._get_hf_api_client()
|
| 206 |
-
|
| 207 |
-
|
|
|
|
| 208 |
source_endpoints.append(endpoint)
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
(str(k) for k in extra_args.keys() if str(k) not in allowed_extra)
|
| 214 |
)
|
| 215 |
-
if
|
| 216 |
return ctx._helper_error(
|
| 217 |
start_calls=start_calls,
|
| 218 |
source=endpoint,
|
| 219 |
-
error=
|
| 220 |
-
)
|
| 221 |
-
if "card_data" in extra_args and "cardData" not in extra_args:
|
| 222 |
-
extra_args["cardData"] = extra_args.pop("card_data")
|
| 223 |
-
else:
|
| 224 |
-
extra_args.pop("card_data", None)
|
| 225 |
-
if len(requested_repo_types) == 1 and isinstance(effective_where, dict):
|
| 226 |
-
(
|
| 227 |
-
effective_author,
|
| 228 |
-
extra_args,
|
| 229 |
-
effective_where,
|
| 230 |
-
promoted_where_filters,
|
| 231 |
-
) = _promote_repo_search_constraints(
|
| 232 |
-
rt,
|
| 233 |
-
effective_author,
|
| 234 |
-
extra_args,
|
| 235 |
-
effective_where,
|
| 236 |
-
)
|
| 237 |
-
if "expand" in extra_args:
|
| 238 |
-
normalized_expand, dropped_expand, expand_error = (
|
| 239 |
-
_sanitize_repo_expand_values(rt, extra_args.get("expand"))
|
| 240 |
)
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
start_calls=start_calls,
|
| 244 |
-
source=endpoint,
|
| 245 |
-
error=expand_error,
|
| 246 |
-
)
|
| 247 |
-
if dropped_expand:
|
| 248 |
-
ignored_expand[rt] = dropped_expand
|
| 249 |
-
if normalized_expand is None:
|
| 250 |
-
extra_args.pop("expand", None)
|
| 251 |
-
else:
|
| 252 |
-
extra_args["expand"] = normalized_expand
|
| 253 |
-
if not any(
|
| 254 |
-
(
|
| 255 |
-
key in extra_args
|
| 256 |
-
for key in ("expand", "full", "cardData", "fetch_config")
|
| 257 |
-
)
|
| 258 |
-
):
|
| 259 |
-
extra_args["expand"] = list(REPO_SEARCH_DEFAULT_EXPAND[rt])
|
| 260 |
try:
|
| 261 |
payload = ctx._host_hf_call(
|
| 262 |
endpoint,
|
| 263 |
-
lambda
|
| 264 |
api,
|
| 265 |
-
|
| 266 |
-
search=term
|
| 267 |
-
author=
|
| 268 |
-
filter=filter_list
|
| 269 |
-
sort=sort_keys[
|
| 270 |
-
limit=
|
| 271 |
**extra_args,
|
| 272 |
),
|
| 273 |
)
|
| 274 |
except Exception as e:
|
| 275 |
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 276 |
scanned += len(payload)
|
| 277 |
-
if len(payload) >=
|
| 278 |
limit_boundary_hit = True
|
| 279 |
all_items.extend(
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
)
|
| 282 |
-
all_items = ctx._apply_where(all_items, effective_where, aliases=REPO_FIELD_ALIASES)
|
| 283 |
combined_sort_key = next(iter(sort_keys.values()), None)
|
| 284 |
all_items = ctx._sort_repo_rows(all_items, combined_sort_key)
|
| 285 |
matched = len(all_items)
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
more_available: bool | str = False
|
| 288 |
truncated = False
|
| 289 |
truncated_by = "none"
|
| 290 |
next_request_hint: str | None = None
|
| 291 |
-
if hard_cap_applied and scanned >=
|
| 292 |
truncated = True
|
| 293 |
truncated_by = "hard_cap"
|
| 294 |
more_available = "unknown"
|
| 295 |
-
next_request_hint = f"Increase limit above {
|
| 296 |
elif limit_boundary_hit:
|
| 297 |
more_available = "unknown"
|
| 298 |
next_request_hint = (
|
| 299 |
-
f"Increase limit above {
|
| 300 |
)
|
|
|
|
| 301 |
return ctx._helper_success(
|
| 302 |
start_calls=start_calls,
|
| 303 |
source=",".join(source_endpoints),
|
| 304 |
items=all_items,
|
| 305 |
-
|
|
|
|
| 306 |
repo_types=requested_repo_types,
|
| 307 |
-
|
| 308 |
sort=combined_sort_key,
|
| 309 |
author=author_clean,
|
| 310 |
-
limit=
|
|
|
|
| 311 |
scanned=scanned,
|
| 312 |
matched=matched,
|
| 313 |
returned=len(all_items),
|
|
@@ -317,16 +316,199 @@ async def hf_repo_search(
|
|
| 317 |
limit_boundary_hit=limit_boundary_hit,
|
| 318 |
next_request_hint=next_request_hint,
|
| 319 |
ignored_expand=ignored_expand or None,
|
| 320 |
-
promoted_where_filters=promoted_where_filters or None,
|
| 321 |
**limit_meta,
|
| 322 |
)
|
| 323 |
|
| 324 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
async def hf_user_likes(
|
| 326 |
ctx: HelperRuntimeContext,
|
| 327 |
username: str | None = None,
|
| 328 |
repo_types: list[str] | None = None,
|
| 329 |
-
|
| 330 |
scan_limit: int | None = None,
|
| 331 |
count_only: bool = False,
|
| 332 |
where: dict[str, Any] | None = None,
|
|
@@ -335,7 +517,7 @@ async def hf_user_likes(
|
|
| 335 |
ranking_window: int | None = None,
|
| 336 |
) -> dict[str, Any]:
|
| 337 |
start_calls = ctx.call_count["n"]
|
| 338 |
-
|
| 339 |
scan_cap = ctx._policy_int("hf_user_likes", "scan_max", LIKES_SCAN_LIMIT_CAP)
|
| 340 |
ranking_default = ctx._policy_int(
|
| 341 |
"hf_user_likes", "ranking_default", LIKES_RANKING_WINDOW_DEFAULT
|
|
@@ -370,16 +552,25 @@ async def hf_user_likes(
|
|
| 370 |
error="sort must be one of liked_at, repo_likes, repo_downloads",
|
| 371 |
)
|
| 372 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 373 |
-
|
| 374 |
count_only=count_only,
|
| 375 |
-
|
| 376 |
-
|
| 377 |
scan_limit=scan_limit,
|
| 378 |
scan_cap=scan_cap,
|
| 379 |
)
|
| 380 |
-
|
| 381 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
allowed_repo_types: set[str] | None = None
|
| 384 |
try:
|
| 385 |
raw_repo_types: list[str] = (
|
|
@@ -456,7 +647,7 @@ async def hf_user_likes(
|
|
| 456 |
selected_pairs = []
|
| 457 |
ranking_complete = False if matched > 0 else exact_count
|
| 458 |
elif sort_key == "liked_at":
|
| 459 |
-
selected_pairs = matched_rows[:
|
| 460 |
else:
|
| 461 |
metric = str(sort_key)
|
| 462 |
requested_window = (
|
|
@@ -504,19 +695,26 @@ async def hf_user_likes(
|
|
| 504 |
return (0, -metric_value, idx)
|
| 505 |
|
| 506 |
ranked_shortlist = sorted(shortlist, key=_ranking_key)
|
| 507 |
-
selected_pairs = ranked_shortlist[:
|
| 508 |
ranking_complete = (
|
| 509 |
exact_count
|
| 510 |
and shortlist_size >= matched
|
| 511 |
and (len(candidates) <= enrich_budget)
|
| 512 |
)
|
| 513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
popularity_present = sum(
|
| 515 |
(1 for _, row in selected_pairs if row.get("repo_likes") is not None)
|
| 516 |
)
|
| 517 |
sample_complete = (
|
| 518 |
exact_count
|
| 519 |
-
and
|
| 520 |
and (sort_key == "liked_at" or ranking_complete)
|
| 521 |
and (not count_only or matched == 0)
|
| 522 |
)
|
|
@@ -563,7 +761,7 @@ async def hf_repo_likers(
|
|
| 563 |
ctx: HelperRuntimeContext,
|
| 564 |
repo_id: str,
|
| 565 |
repo_type: str,
|
| 566 |
-
|
| 567 |
count_only: bool = False,
|
| 568 |
pro_only: bool | None = None,
|
| 569 |
where: dict[str, Any] | None = None,
|
|
@@ -585,9 +783,9 @@ async def hf_repo_likers(
|
|
| 585 |
error=f"Unsupported repo_type '{repo_type}'",
|
| 586 |
repo_id=rid,
|
| 587 |
)
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
default_limit_used =
|
| 591 |
has_where = isinstance(where, dict) and bool(where)
|
| 592 |
endpoint = f"/api/{rt}s/{rid}/likers"
|
| 593 |
resp = ctx._host_raw_call(endpoint)
|
|
@@ -600,7 +798,18 @@ async def hf_repo_likers(
|
|
| 600 |
repo_type=rt,
|
| 601 |
)
|
| 602 |
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
| 603 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 604 |
normalized: list[dict[str, Any]] = []
|
| 605 |
for row in payload:
|
| 606 |
if not isinstance(row, dict):
|
|
@@ -614,37 +823,37 @@ async def hf_repo_likers(
|
|
| 614 |
"type": row.get("type")
|
| 615 |
if isinstance(row.get("type"), str) and row.get("type")
|
| 616 |
else "user",
|
| 617 |
-
"
|
| 618 |
}
|
| 619 |
-
if pro_only is True and item.get("
|
| 620 |
continue
|
| 621 |
-
if pro_only is False and item.get("
|
| 622 |
continue
|
| 623 |
if not ctx._item_matches_where(item, normalized_where):
|
| 624 |
continue
|
| 625 |
normalized.append(item)
|
| 626 |
if count_only:
|
| 627 |
-
|
| 628 |
-
elif
|
| 629 |
-
|
| 630 |
else:
|
| 631 |
try:
|
| 632 |
-
|
| 633 |
except Exception:
|
| 634 |
-
|
| 635 |
limit_plan = {
|
| 636 |
-
"
|
| 637 |
-
"
|
| 638 |
"default_limit_used": default_limit_used,
|
| 639 |
"hard_cap_applied": False,
|
| 640 |
}
|
| 641 |
matched = len(normalized)
|
| 642 |
-
items = [] if count_only else normalized[:
|
| 643 |
-
|
| 644 |
truncated_by = ctx._derive_truncated_by(
|
| 645 |
-
hard_cap=False,
|
| 646 |
)
|
| 647 |
-
sample_complete = matched <=
|
| 648 |
truncated = truncated_by != "none"
|
| 649 |
more_available = ctx._derive_more_available(
|
| 650 |
sample_complete=sample_complete,
|
|
@@ -652,7 +861,16 @@ async def hf_repo_likers(
|
|
| 652 |
returned=len(items),
|
| 653 |
total=matched,
|
| 654 |
)
|
| 655 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
meta = ctx._build_exhaustive_meta(
|
| 657 |
base_meta={
|
| 658 |
"scanned": len(payload),
|
|
@@ -683,7 +901,11 @@ async def hf_repo_likers(
|
|
| 683 |
|
| 684 |
|
| 685 |
async def hf_repo_discussions(
|
| 686 |
-
ctx: HelperRuntimeContext,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
) -> dict[str, Any]:
|
| 688 |
start_calls = ctx.call_count["n"]
|
| 689 |
rt = ctx._canonical_repo_type(repo_type)
|
|
@@ -718,17 +940,21 @@ async def hf_repo_discussions(
|
|
| 718 |
items.append(
|
| 719 |
{
|
| 720 |
"num": num,
|
| 721 |
-
"
|
| 722 |
-
"
|
| 723 |
-
"id": num,
|
| 724 |
"title": getattr(d, "title", None),
|
| 725 |
"author": getattr(d, "author", None),
|
| 726 |
-
"
|
| 727 |
if getattr(d, "created_at", None) is not None
|
| 728 |
else None,
|
| 729 |
"status": getattr(d, "status", None),
|
|
|
|
| 730 |
}
|
| 731 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 732 |
return ctx._helper_success(
|
| 733 |
start_calls=start_calls,
|
| 734 |
source=endpoint,
|
|
@@ -742,7 +968,11 @@ async def hf_repo_discussions(
|
|
| 742 |
|
| 743 |
|
| 744 |
async def hf_repo_discussion_details(
|
| 745 |
-
ctx: HelperRuntimeContext,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
) -> dict[str, Any]:
|
| 747 |
start_calls = ctx.call_count["n"]
|
| 748 |
rt = ctx._canonical_repo_type(repo_type)
|
|
@@ -779,7 +1009,7 @@ async def hf_repo_discussion_details(
|
|
| 779 |
comment_events.append(
|
| 780 |
{
|
| 781 |
"author": getattr(event, "author", None),
|
| 782 |
-
"
|
| 783 |
"text": getattr(event, "content", None),
|
| 784 |
"rendered": getattr(event, "rendered", None),
|
| 785 |
}
|
|
@@ -787,31 +1017,22 @@ async def hf_repo_discussion_details(
|
|
| 787 |
latest_comment: dict[str, Any] | None = None
|
| 788 |
if comment_events:
|
| 789 |
latest_comment = max(
|
| 790 |
-
comment_events, key=lambda row: str(row.get("
|
| 791 |
)
|
| 792 |
item: dict[str, Any] = {
|
| 793 |
"num": num,
|
| 794 |
-
"number": num,
|
| 795 |
-
"discussionNum": num,
|
| 796 |
-
"id": num,
|
| 797 |
"repo_id": rid,
|
| 798 |
"repo_type": rt,
|
| 799 |
"title": getattr(detail, "title", None),
|
| 800 |
"author": getattr(detail, "author", None),
|
| 801 |
-
"
|
| 802 |
"status": getattr(detail, "status", None),
|
| 803 |
"url": getattr(detail, "url", None),
|
| 804 |
-
"
|
| 805 |
-
"latestCommentAuthor": latest_comment.get("author") if latest_comment else None,
|
| 806 |
-
"latestCommentCreatedAt": latest_comment.get("createdAt")
|
| 807 |
-
if latest_comment
|
| 808 |
-
else None,
|
| 809 |
-
"latestCommentText": latest_comment.get("text") if latest_comment else None,
|
| 810 |
-
"latestCommentHtml": latest_comment.get("rendered") if latest_comment else None,
|
| 811 |
"latest_comment_author": latest_comment.get("author")
|
| 812 |
if latest_comment
|
| 813 |
else None,
|
| 814 |
-
"latest_comment_created_at": latest_comment.get("
|
| 815 |
if latest_comment
|
| 816 |
else None,
|
| 817 |
"latest_comment_text": latest_comment.get("text") if latest_comment else None,
|
|
@@ -819,13 +1040,17 @@ async def hf_repo_discussion_details(
|
|
| 819 |
if latest_comment
|
| 820 |
else None,
|
| 821 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 822 |
return ctx._helper_success(
|
| 823 |
start_calls=start_calls,
|
| 824 |
source=endpoint,
|
| 825 |
-
items=
|
| 826 |
scanned=len(comment_events),
|
| 827 |
matched=1,
|
| 828 |
-
returned=
|
| 829 |
truncated=False,
|
| 830 |
total_comments=len(comment_events),
|
| 831 |
)
|
|
@@ -926,7 +1151,10 @@ async def hf_repo_details(
|
|
| 926 |
failures=failures,
|
| 927 |
repo_type=repo_type,
|
| 928 |
)
|
| 929 |
-
|
|
|
|
|
|
|
|
|
|
| 930 |
return ctx._helper_success(
|
| 931 |
start_calls=start_calls,
|
| 932 |
source="/api/repos",
|
|
@@ -947,9 +1175,9 @@ async def hf_trending(
|
|
| 947 |
fields: list[str] | None = None,
|
| 948 |
) -> dict[str, Any]:
|
| 949 |
start_calls = ctx.call_count["n"]
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
"hf_trending", "
|
| 953 |
)
|
| 954 |
raw_type = str(repo_type or "model").strip().lower()
|
| 955 |
if raw_type == "all":
|
|
@@ -962,7 +1190,7 @@ async def hf_trending(
|
|
| 962 |
source="/api/trending",
|
| 963 |
error=f"Unsupported repo_type '{repo_type}'",
|
| 964 |
)
|
| 965 |
-
lim = ctx._clamp_int(limit, default=
|
| 966 |
resp = ctx._host_raw_call(
|
| 967 |
"/api/trending", params={"type": requested_type, "limit": lim}
|
| 968 |
)
|
|
@@ -985,9 +1213,23 @@ async def hf_trending(
|
|
| 985 |
continue
|
| 986 |
repo = row.get("repoData") if isinstance(row.get("repoData"), dict) else {}
|
| 987 |
items.append(ctx._normalize_trending_row(repo, default_row_type, rank=idx))
|
| 988 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
matched = len(items)
|
| 990 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 991 |
return ctx._helper_success(
|
| 992 |
start_calls=start_calls,
|
| 993 |
source="/api/trending",
|
|
@@ -1011,11 +1253,11 @@ async def hf_daily_papers(
|
|
| 1011 |
fields: list[str] | None = None,
|
| 1012 |
) -> dict[str, Any]:
|
| 1013 |
start_calls = ctx.call_count["n"]
|
| 1014 |
-
|
| 1015 |
-
|
| 1016 |
-
"hf_daily_papers", "
|
| 1017 |
)
|
| 1018 |
-
lim = ctx._clamp_int(limit, default=
|
| 1019 |
resp = ctx._host_raw_call("/api/daily_papers", params={"limit": lim})
|
| 1020 |
if not resp.get("ok"):
|
| 1021 |
return ctx._helper_error(
|
|
@@ -1029,9 +1271,25 @@ async def hf_daily_papers(
|
|
| 1029 |
if not isinstance(row, dict):
|
| 1030 |
continue
|
| 1031 |
items.append(ctx._normalize_daily_paper_row(row, rank=idx))
|
| 1032 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1033 |
matched = len(items)
|
| 1034 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1035 |
return ctx._helper_success(
|
| 1036 |
start_calls=start_calls,
|
| 1037 |
source="/api/daily_papers",
|
|
@@ -1046,6 +1304,9 @@ async def hf_daily_papers(
|
|
| 1046 |
|
| 1047 |
def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
|
| 1048 |
return {
|
|
|
|
|
|
|
|
|
|
| 1049 |
"hf_repo_search": partial(hf_repo_search, ctx),
|
| 1050 |
"hf_user_likes": partial(hf_user_likes, ctx),
|
| 1051 |
"hf_repo_likers": partial(hf_repo_likers, ctx),
|
|
|
|
| 5 |
from typing import Any, Callable
|
| 6 |
from huggingface_hub import HfApi
|
| 7 |
from ..context_types import HelperRuntimeContext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from ..constants import (
|
| 9 |
+
ACTOR_CANONICAL_FIELDS,
|
| 10 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 11 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 12 |
LIKES_ENRICHMENT_MAX_REPOS,
|
| 13 |
LIKES_RANKING_WINDOW_DEFAULT,
|
| 14 |
LIKES_SCAN_LIMIT_CAP,
|
| 15 |
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 16 |
+
REPO_CANONICAL_FIELDS,
|
| 17 |
SELECTIVE_ENDPOINT_RETURN_HARD_CAP,
|
| 18 |
TRENDING_ENDPOINT_MAX_LIMIT,
|
| 19 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 20 |
)
|
| 21 |
from ..registry import (
|
| 22 |
REPO_SEARCH_ALLOWED_EXPAND,
|
|
|
|
| 40 |
elif isinstance(raw_expand, (list, tuple, set)):
|
| 41 |
requested_values = list(raw_expand)
|
| 42 |
else:
|
| 43 |
+
return (None, [], "expand must be a string or a list of strings")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
cleaned: list[str] = []
|
| 46 |
for value in requested_values:
|
|
|
|
| 54 |
return (kept or None, dropped, None)
|
| 55 |
|
| 56 |
|
| 57 |
+
def _resolve_repo_search_types(
|
| 58 |
+
ctx: HelperRuntimeContext,
|
| 59 |
+
*,
|
| 60 |
+
repo_type: str | None,
|
| 61 |
+
repo_types: list[str] | None,
|
| 62 |
+
default_repo_type: str = "model",
|
| 63 |
+
) -> tuple[list[str] | None, str | None]:
|
| 64 |
+
if repo_type is not None and repo_types is not None:
|
| 65 |
+
return (None, "Pass either repo_type or repo_types, not both")
|
| 66 |
+
|
| 67 |
+
if repo_types is None:
|
| 68 |
+
raw_type = str(repo_type or "").strip()
|
| 69 |
+
if not raw_type:
|
| 70 |
+
return ([default_repo_type], None)
|
| 71 |
+
canonical = ctx._canonical_repo_type(raw_type, default="")
|
| 72 |
+
if canonical not in {"model", "dataset", "space"}:
|
| 73 |
+
return (None, f"Unsupported repo_type '{repo_type}'")
|
| 74 |
+
return ([canonical], None)
|
| 75 |
+
|
| 76 |
+
raw_types = ctx._coerce_str_list(repo_types)
|
| 77 |
+
if not raw_types:
|
| 78 |
+
return (None, "repo_types must not be empty")
|
| 79 |
+
|
| 80 |
+
requested_repo_types: list[str] = []
|
| 81 |
+
for raw in raw_types:
|
| 82 |
+
canonical = ctx._canonical_repo_type(raw, default="")
|
| 83 |
+
if canonical not in {"model", "dataset", "space"}:
|
| 84 |
+
return (None, f"Unsupported repo_type '{raw}'")
|
| 85 |
+
if canonical not in requested_repo_types:
|
| 86 |
+
requested_repo_types.append(canonical)
|
| 87 |
+
return (requested_repo_types, None)
|
| 88 |
+
|
| 89 |
|
| 90 |
+
def _clean_repo_search_text(value: str | None) -> str | None:
|
| 91 |
+
cleaned = str(value or "").strip()
|
| 92 |
+
return cleaned or None
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
def _normalize_repo_search_filter(
|
| 96 |
+
ctx: HelperRuntimeContext, value: str | list[str] | None
|
| 97 |
+
) -> tuple[list[str] | None, str | None]:
|
| 98 |
+
if value is None:
|
| 99 |
+
return (None, None)
|
| 100 |
+
try:
|
| 101 |
+
normalized = ctx._coerce_str_list(value)
|
| 102 |
+
except ValueError:
|
| 103 |
+
return (None, "filter must be a string or a list of strings")
|
| 104 |
+
return (normalized or None, None)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _build_repo_search_extra_args(
|
| 108 |
+
repo_type: str, **candidate_args: Any
|
| 109 |
+
) -> tuple[dict[str, Any], list[str], str | None]:
|
| 110 |
+
normalized: dict[str, Any] = {}
|
| 111 |
+
for key, value in candidate_args.items():
|
| 112 |
+
if value is None:
|
| 113 |
+
continue
|
| 114 |
+
if key in {"card_data", "cardData"}:
|
| 115 |
+
if value:
|
| 116 |
+
normalized["cardData"] = True
|
| 117 |
+
continue
|
| 118 |
+
if key in {"fetch_config", "linked"}:
|
| 119 |
+
if value:
|
| 120 |
+
normalized[key] = True
|
| 121 |
+
continue
|
| 122 |
+
normalized[key] = value
|
| 123 |
+
|
| 124 |
+
allowed_extra = REPO_SEARCH_EXTRA_ARGS.get(repo_type, set())
|
| 125 |
+
unsupported = sorted(str(key) for key in normalized if str(key) not in allowed_extra)
|
| 126 |
+
if unsupported:
|
| 127 |
+
return (
|
| 128 |
+
{},
|
| 129 |
+
[],
|
| 130 |
+
f"Unsupported search args for repo_type='{repo_type}': {unsupported}. Allowed args: {sorted(allowed_extra)}",
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
dropped_expand: list[str] = []
|
| 134 |
+
if "expand" in normalized:
|
| 135 |
+
kept_expand, dropped_expand, expand_error = _sanitize_repo_expand_values(
|
| 136 |
+
repo_type, normalized.get("expand")
|
| 137 |
+
)
|
| 138 |
+
if expand_error:
|
| 139 |
+
return ({}, [], expand_error)
|
| 140 |
+
if kept_expand is None:
|
| 141 |
+
normalized.pop("expand", None)
|
| 142 |
+
else:
|
| 143 |
+
normalized["expand"] = kept_expand
|
| 144 |
|
| 145 |
+
if not any(
|
| 146 |
+
key in normalized for key in ("expand", "full", "cardData", "fetch_config")
|
| 147 |
+
):
|
| 148 |
+
normalized["expand"] = list(REPO_SEARCH_DEFAULT_EXPAND[repo_type])
|
| 149 |
+
|
| 150 |
+
return (normalized, dropped_expand, None)
|
| 151 |
|
| 152 |
|
| 153 |
def _normalize_user_likes_sort(sort: str | None) -> tuple[str | None, str | None]:
|
| 154 |
+
normalized = str(sort or "liked_at").strip() or "liked_at"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
if normalized not in {"liked_at", "repo_likes", "repo_downloads"}:
|
| 156 |
return (None, "sort must be one of liked_at, repo_likes, repo_downloads")
|
| 157 |
return (normalized, None)
|
| 158 |
|
| 159 |
|
| 160 |
+
async def _run_repo_search(
|
| 161 |
ctx: HelperRuntimeContext,
|
| 162 |
+
*,
|
| 163 |
+
helper_name: str,
|
| 164 |
+
requested_repo_types: list[str],
|
| 165 |
+
search: str | None,
|
| 166 |
+
filter: str | list[str] | None,
|
| 167 |
+
author: str | None,
|
| 168 |
+
sort: str | None,
|
| 169 |
+
limit: int,
|
| 170 |
+
fields: list[str] | None,
|
| 171 |
+
post_filter: dict[str, Any] | None,
|
| 172 |
+
extra_args_by_type: dict[str, dict[str, Any]] | None = None,
|
| 173 |
) -> dict[str, Any]:
|
| 174 |
start_calls = ctx.call_count["n"]
|
| 175 |
+
default_limit = ctx._policy_int(helper_name, "default_limit", 20)
|
| 176 |
+
max_limit = ctx._policy_int(
|
| 177 |
+
helper_name, "max_limit", SELECTIVE_ENDPOINT_RETURN_HARD_CAP
|
| 178 |
)
|
| 179 |
+
filter_list, filter_error = _normalize_repo_search_filter(ctx, filter)
|
| 180 |
+
if filter_error:
|
| 181 |
return ctx._helper_error(
|
| 182 |
start_calls=start_calls,
|
| 183 |
source="/api/repos",
|
| 184 |
+
error=filter_error,
|
| 185 |
)
|
| 186 |
+
|
| 187 |
+
term = _clean_repo_search_text(search)
|
| 188 |
+
author_clean = _clean_repo_search_text(author)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
requested_limit = limit
|
| 190 |
+
applied_limit = ctx._clamp_int(
|
| 191 |
+
limit,
|
| 192 |
+
default=default_limit,
|
| 193 |
+
minimum=1,
|
| 194 |
+
maximum=max_limit,
|
| 195 |
+
)
|
| 196 |
limit_meta = ctx._derive_limit_metadata(
|
| 197 |
+
requested_limit=requested_limit,
|
| 198 |
+
applied_limit=applied_limit,
|
| 199 |
+
default_limit_used=limit == default_limit,
|
| 200 |
)
|
| 201 |
hard_cap_applied = bool(limit_meta.get("hard_cap_applied"))
|
| 202 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
sort_keys: dict[str, str | None] = {}
|
| 204 |
+
for repo_type in requested_repo_types:
|
| 205 |
+
sort_key, sort_error = ctx._normalize_repo_sort_key(repo_type, sort)
|
| 206 |
if sort_error:
|
| 207 |
return ctx._helper_error(
|
| 208 |
+
start_calls=start_calls,
|
| 209 |
+
source=f"/api/{repo_type}s",
|
| 210 |
+
error=sort_error,
|
| 211 |
)
|
| 212 |
+
sort_keys[repo_type] = sort_key
|
| 213 |
+
|
| 214 |
all_items: list[dict[str, Any]] = []
|
| 215 |
scanned = 0
|
| 216 |
source_endpoints: list[str] = []
|
| 217 |
limit_boundary_hit = False
|
| 218 |
ignored_expand: dict[str, list[str]] = {}
|
|
|
|
|
|
|
| 219 |
api = ctx._get_hf_api_client()
|
| 220 |
+
|
| 221 |
+
for repo_type in requested_repo_types:
|
| 222 |
+
endpoint = f"/api/{repo_type}s"
|
| 223 |
source_endpoints.append(endpoint)
|
| 224 |
+
raw_extra_args = dict((extra_args_by_type or {}).get(repo_type, {}))
|
| 225 |
+
extra_args, dropped_expand, extra_error = _build_repo_search_extra_args(
|
| 226 |
+
repo_type,
|
| 227 |
+
**raw_extra_args,
|
|
|
|
| 228 |
)
|
| 229 |
+
if extra_error:
|
| 230 |
return ctx._helper_error(
|
| 231 |
start_calls=start_calls,
|
| 232 |
source=endpoint,
|
| 233 |
+
error=extra_error,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
)
|
| 235 |
+
if dropped_expand:
|
| 236 |
+
ignored_expand[repo_type] = dropped_expand
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
try:
|
| 238 |
payload = ctx._host_hf_call(
|
| 239 |
endpoint,
|
| 240 |
+
lambda repo_type=repo_type, extra_args=extra_args: ctx._repo_list_call(
|
| 241 |
api,
|
| 242 |
+
repo_type,
|
| 243 |
+
search=term,
|
| 244 |
+
author=author_clean,
|
| 245 |
+
filter=filter_list,
|
| 246 |
+
sort=sort_keys[repo_type],
|
| 247 |
+
limit=applied_limit,
|
| 248 |
**extra_args,
|
| 249 |
),
|
| 250 |
)
|
| 251 |
except Exception as e:
|
| 252 |
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 253 |
scanned += len(payload)
|
| 254 |
+
if len(payload) >= applied_limit:
|
| 255 |
limit_boundary_hit = True
|
| 256 |
all_items.extend(
|
| 257 |
+
ctx._normalize_repo_search_row(row, repo_type)
|
| 258 |
+
for row in payload[:applied_limit]
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
try:
|
| 262 |
+
all_items = ctx._apply_where(
|
| 263 |
+
all_items, post_filter, allowed_fields=REPO_CANONICAL_FIELDS
|
| 264 |
+
)
|
| 265 |
+
except ValueError as exc:
|
| 266 |
+
return ctx._helper_error(
|
| 267 |
+
start_calls=start_calls,
|
| 268 |
+
source="/api/repos",
|
| 269 |
+
error=exc,
|
| 270 |
)
|
|
|
|
| 271 |
combined_sort_key = next(iter(sort_keys.values()), None)
|
| 272 |
all_items = ctx._sort_repo_rows(all_items, combined_sort_key)
|
| 273 |
matched = len(all_items)
|
| 274 |
+
try:
|
| 275 |
+
all_items = ctx._project_repo_items(all_items[:applied_limit], fields)
|
| 276 |
+
except ValueError as exc:
|
| 277 |
+
return ctx._helper_error(
|
| 278 |
+
start_calls=start_calls,
|
| 279 |
+
source="/api/repos",
|
| 280 |
+
error=exc,
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
more_available: bool | str = False
|
| 284 |
truncated = False
|
| 285 |
truncated_by = "none"
|
| 286 |
next_request_hint: str | None = None
|
| 287 |
+
if hard_cap_applied and scanned >= applied_limit:
|
| 288 |
truncated = True
|
| 289 |
truncated_by = "hard_cap"
|
| 290 |
more_available = "unknown"
|
| 291 |
+
next_request_hint = f"Increase limit above {applied_limit} to improve coverage"
|
| 292 |
elif limit_boundary_hit:
|
| 293 |
more_available = "unknown"
|
| 294 |
next_request_hint = (
|
| 295 |
+
f"Increase limit above {applied_limit} to check whether more rows exist"
|
| 296 |
)
|
| 297 |
+
|
| 298 |
return ctx._helper_success(
|
| 299 |
start_calls=start_calls,
|
| 300 |
source=",".join(source_endpoints),
|
| 301 |
items=all_items,
|
| 302 |
+
helper=helper_name,
|
| 303 |
+
search=term,
|
| 304 |
repo_types=requested_repo_types,
|
| 305 |
+
filter=filter_list,
|
| 306 |
sort=combined_sort_key,
|
| 307 |
author=author_clean,
|
| 308 |
+
limit=applied_limit,
|
| 309 |
+
post_filter=post_filter if isinstance(post_filter, dict) and post_filter else None,
|
| 310 |
scanned=scanned,
|
| 311 |
matched=matched,
|
| 312 |
returned=len(all_items),
|
|
|
|
| 316 |
limit_boundary_hit=limit_boundary_hit,
|
| 317 |
next_request_hint=next_request_hint,
|
| 318 |
ignored_expand=ignored_expand or None,
|
|
|
|
| 319 |
**limit_meta,
|
| 320 |
)
|
| 321 |
|
| 322 |
|
| 323 |
+
async def hf_models_search(
|
| 324 |
+
ctx: HelperRuntimeContext,
|
| 325 |
+
search: str | None = None,
|
| 326 |
+
filter: str | list[str] | None = None,
|
| 327 |
+
author: str | None = None,
|
| 328 |
+
apps: str | list[str] | None = None,
|
| 329 |
+
gated: bool | None = None,
|
| 330 |
+
inference: str | None = None,
|
| 331 |
+
inference_provider: str | list[str] | None = None,
|
| 332 |
+
model_name: str | None = None,
|
| 333 |
+
trained_dataset: str | list[str] | None = None,
|
| 334 |
+
pipeline_tag: str | None = None,
|
| 335 |
+
emissions_thresholds: tuple[float, float] | None = None,
|
| 336 |
+
sort: str | None = None,
|
| 337 |
+
limit: int = 20,
|
| 338 |
+
expand: list[str] | None = None,
|
| 339 |
+
full: bool | None = None,
|
| 340 |
+
card_data: bool = False,
|
| 341 |
+
fetch_config: bool = False,
|
| 342 |
+
fields: list[str] | None = None,
|
| 343 |
+
post_filter: dict[str, Any] | None = None,
|
| 344 |
+
) -> dict[str, Any]:
|
| 345 |
+
return await _run_repo_search(
|
| 346 |
+
ctx,
|
| 347 |
+
helper_name="hf_models_search",
|
| 348 |
+
requested_repo_types=["model"],
|
| 349 |
+
search=search,
|
| 350 |
+
filter=filter,
|
| 351 |
+
author=author,
|
| 352 |
+
sort=sort,
|
| 353 |
+
limit=limit,
|
| 354 |
+
fields=fields,
|
| 355 |
+
post_filter=post_filter,
|
| 356 |
+
extra_args_by_type={
|
| 357 |
+
"model": {
|
| 358 |
+
"apps": apps,
|
| 359 |
+
"gated": gated,
|
| 360 |
+
"inference": inference,
|
| 361 |
+
"inference_provider": inference_provider,
|
| 362 |
+
"model_name": model_name,
|
| 363 |
+
"trained_dataset": trained_dataset,
|
| 364 |
+
"pipeline_tag": pipeline_tag,
|
| 365 |
+
"emissions_thresholds": emissions_thresholds,
|
| 366 |
+
"expand": expand,
|
| 367 |
+
"full": full,
|
| 368 |
+
"card_data": card_data,
|
| 369 |
+
"fetch_config": fetch_config,
|
| 370 |
+
}
|
| 371 |
+
},
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
async def hf_datasets_search(
|
| 376 |
+
ctx: HelperRuntimeContext,
|
| 377 |
+
search: str | None = None,
|
| 378 |
+
filter: str | list[str] | None = None,
|
| 379 |
+
author: str | None = None,
|
| 380 |
+
benchmark: str | bool | None = None,
|
| 381 |
+
dataset_name: str | None = None,
|
| 382 |
+
gated: bool | None = None,
|
| 383 |
+
language_creators: str | list[str] | None = None,
|
| 384 |
+
language: str | list[str] | None = None,
|
| 385 |
+
multilinguality: str | list[str] | None = None,
|
| 386 |
+
size_categories: str | list[str] | None = None,
|
| 387 |
+
task_categories: str | list[str] | None = None,
|
| 388 |
+
task_ids: str | list[str] | None = None,
|
| 389 |
+
sort: str | None = None,
|
| 390 |
+
limit: int = 20,
|
| 391 |
+
expand: list[str] | None = None,
|
| 392 |
+
full: bool | None = None,
|
| 393 |
+
fields: list[str] | None = None,
|
| 394 |
+
post_filter: dict[str, Any] | None = None,
|
| 395 |
+
) -> dict[str, Any]:
|
| 396 |
+
return await _run_repo_search(
|
| 397 |
+
ctx,
|
| 398 |
+
helper_name="hf_datasets_search",
|
| 399 |
+
requested_repo_types=["dataset"],
|
| 400 |
+
search=search,
|
| 401 |
+
filter=filter,
|
| 402 |
+
author=author,
|
| 403 |
+
sort=sort,
|
| 404 |
+
limit=limit,
|
| 405 |
+
fields=fields,
|
| 406 |
+
post_filter=post_filter,
|
| 407 |
+
extra_args_by_type={
|
| 408 |
+
"dataset": {
|
| 409 |
+
"benchmark": benchmark,
|
| 410 |
+
"dataset_name": dataset_name,
|
| 411 |
+
"gated": gated,
|
| 412 |
+
"language_creators": language_creators,
|
| 413 |
+
"language": language,
|
| 414 |
+
"multilinguality": multilinguality,
|
| 415 |
+
"size_categories": size_categories,
|
| 416 |
+
"task_categories": task_categories,
|
| 417 |
+
"task_ids": task_ids,
|
| 418 |
+
"expand": expand,
|
| 419 |
+
"full": full,
|
| 420 |
+
}
|
| 421 |
+
},
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
async def hf_spaces_search(
|
| 426 |
+
ctx: HelperRuntimeContext,
|
| 427 |
+
search: str | None = None,
|
| 428 |
+
filter: str | list[str] | None = None,
|
| 429 |
+
author: str | None = None,
|
| 430 |
+
datasets: str | list[str] | None = None,
|
| 431 |
+
models: str | list[str] | None = None,
|
| 432 |
+
linked: bool = False,
|
| 433 |
+
sort: str | None = None,
|
| 434 |
+
limit: int = 20,
|
| 435 |
+
expand: list[str] | None = None,
|
| 436 |
+
full: bool | None = None,
|
| 437 |
+
fields: list[str] | None = None,
|
| 438 |
+
post_filter: dict[str, Any] | None = None,
|
| 439 |
+
) -> dict[str, Any]:
|
| 440 |
+
return await _run_repo_search(
|
| 441 |
+
ctx,
|
| 442 |
+
helper_name="hf_spaces_search",
|
| 443 |
+
requested_repo_types=["space"],
|
| 444 |
+
search=search,
|
| 445 |
+
filter=filter,
|
| 446 |
+
author=author,
|
| 447 |
+
sort=sort,
|
| 448 |
+
limit=limit,
|
| 449 |
+
fields=fields,
|
| 450 |
+
post_filter=post_filter,
|
| 451 |
+
extra_args_by_type={
|
| 452 |
+
"space": {
|
| 453 |
+
"datasets": datasets,
|
| 454 |
+
"models": models,
|
| 455 |
+
"linked": linked,
|
| 456 |
+
"expand": expand,
|
| 457 |
+
"full": full,
|
| 458 |
+
}
|
| 459 |
+
},
|
| 460 |
+
)
|
| 461 |
+
|
| 462 |
+
|
| 463 |
+
async def hf_repo_search(
|
| 464 |
+
ctx: HelperRuntimeContext,
|
| 465 |
+
search: str | None = None,
|
| 466 |
+
repo_type: str | None = None,
|
| 467 |
+
repo_types: list[str] | None = None,
|
| 468 |
+
filter: str | list[str] | None = None,
|
| 469 |
+
author: str | None = None,
|
| 470 |
+
sort: str | None = None,
|
| 471 |
+
limit: int = 20,
|
| 472 |
+
fields: list[str] | None = None,
|
| 473 |
+
post_filter: dict[str, Any] | None = None,
|
| 474 |
+
) -> dict[str, Any]:
|
| 475 |
+
start_calls = ctx.call_count["n"]
|
| 476 |
+
requested_repo_types, type_error = _resolve_repo_search_types(
|
| 477 |
+
ctx,
|
| 478 |
+
repo_type=repo_type,
|
| 479 |
+
repo_types=repo_types,
|
| 480 |
+
)
|
| 481 |
+
if type_error:
|
| 482 |
+
return ctx._helper_error(
|
| 483 |
+
start_calls=start_calls,
|
| 484 |
+
source="/api/repos",
|
| 485 |
+
error=type_error,
|
| 486 |
+
)
|
| 487 |
+
if not requested_repo_types:
|
| 488 |
+
return ctx._helper_error(
|
| 489 |
+
start_calls=start_calls,
|
| 490 |
+
source="/api/repos",
|
| 491 |
+
error="repo_type or repo_types is required",
|
| 492 |
+
)
|
| 493 |
+
return await _run_repo_search(
|
| 494 |
+
ctx,
|
| 495 |
+
helper_name="hf_repo_search",
|
| 496 |
+
requested_repo_types=requested_repo_types,
|
| 497 |
+
search=search,
|
| 498 |
+
filter=filter,
|
| 499 |
+
author=author,
|
| 500 |
+
sort=sort,
|
| 501 |
+
limit=limit,
|
| 502 |
+
fields=fields,
|
| 503 |
+
post_filter=post_filter,
|
| 504 |
+
)
|
| 505 |
+
|
| 506 |
+
|
| 507 |
async def hf_user_likes(
|
| 508 |
ctx: HelperRuntimeContext,
|
| 509 |
username: str | None = None,
|
| 510 |
repo_types: list[str] | None = None,
|
| 511 |
+
limit: int | None = None,
|
| 512 |
scan_limit: int | None = None,
|
| 513 |
count_only: bool = False,
|
| 514 |
where: dict[str, Any] | None = None,
|
|
|
|
| 517 |
ranking_window: int | None = None,
|
| 518 |
) -> dict[str, Any]:
|
| 519 |
start_calls = ctx.call_count["n"]
|
| 520 |
+
default_limit = ctx._policy_int("hf_user_likes", "default_limit", 100)
|
| 521 |
scan_cap = ctx._policy_int("hf_user_likes", "scan_max", LIKES_SCAN_LIMIT_CAP)
|
| 522 |
ranking_default = ctx._policy_int(
|
| 523 |
"hf_user_likes", "ranking_default", LIKES_RANKING_WINDOW_DEFAULT
|
|
|
|
| 552 |
error="sort must be one of liked_at, repo_likes, repo_downloads",
|
| 553 |
)
|
| 554 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 555 |
+
limit=limit,
|
| 556 |
count_only=count_only,
|
| 557 |
+
default_limit=default_limit,
|
| 558 |
+
max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 559 |
scan_limit=scan_limit,
|
| 560 |
scan_cap=scan_cap,
|
| 561 |
)
|
| 562 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 563 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 564 |
+
try:
|
| 565 |
+
normalized_where = ctx._normalize_where(
|
| 566 |
+
where, allowed_fields=USER_LIKES_CANONICAL_FIELDS
|
| 567 |
+
)
|
| 568 |
+
except ValueError as exc:
|
| 569 |
+
return ctx._helper_error(
|
| 570 |
+
start_calls=start_calls,
|
| 571 |
+
source=f"/api/users/{resolved_username}/likes",
|
| 572 |
+
error=exc,
|
| 573 |
+
)
|
| 574 |
allowed_repo_types: set[str] | None = None
|
| 575 |
try:
|
| 576 |
raw_repo_types: list[str] = (
|
|
|
|
| 647 |
selected_pairs = []
|
| 648 |
ranking_complete = False if matched > 0 else exact_count
|
| 649 |
elif sort_key == "liked_at":
|
| 650 |
+
selected_pairs = matched_rows[:applied_limit]
|
| 651 |
else:
|
| 652 |
metric = str(sort_key)
|
| 653 |
requested_window = (
|
|
|
|
| 695 |
return (0, -metric_value, idx)
|
| 696 |
|
| 697 |
ranked_shortlist = sorted(shortlist, key=_ranking_key)
|
| 698 |
+
selected_pairs = ranked_shortlist[:applied_limit]
|
| 699 |
ranking_complete = (
|
| 700 |
exact_count
|
| 701 |
and shortlist_size >= matched
|
| 702 |
and (len(candidates) <= enrich_budget)
|
| 703 |
)
|
| 704 |
+
try:
|
| 705 |
+
items = ctx._project_user_like_items([row for _, row in selected_pairs], fields)
|
| 706 |
+
except ValueError as exc:
|
| 707 |
+
return ctx._helper_error(
|
| 708 |
+
start_calls=start_calls,
|
| 709 |
+
source=endpoint,
|
| 710 |
+
error=exc,
|
| 711 |
+
)
|
| 712 |
popularity_present = sum(
|
| 713 |
(1 for _, row in selected_pairs if row.get("repo_likes") is not None)
|
| 714 |
)
|
| 715 |
sample_complete = (
|
| 716 |
exact_count
|
| 717 |
+
and applied_limit >= matched
|
| 718 |
and (sort_key == "liked_at" or ranking_complete)
|
| 719 |
and (not count_only or matched == 0)
|
| 720 |
)
|
|
|
|
| 761 |
ctx: HelperRuntimeContext,
|
| 762 |
repo_id: str,
|
| 763 |
repo_type: str,
|
| 764 |
+
limit: int | None = None,
|
| 765 |
count_only: bool = False,
|
| 766 |
pro_only: bool | None = None,
|
| 767 |
where: dict[str, Any] | None = None,
|
|
|
|
| 783 |
error=f"Unsupported repo_type '{repo_type}'",
|
| 784 |
repo_id=rid,
|
| 785 |
)
|
| 786 |
+
default_limit = ctx._policy_int("hf_repo_likers", "default_limit", 1000)
|
| 787 |
+
requested_limit = limit
|
| 788 |
+
default_limit_used = requested_limit is None and (not count_only)
|
| 789 |
has_where = isinstance(where, dict) and bool(where)
|
| 790 |
endpoint = f"/api/{rt}s/{rid}/likers"
|
| 791 |
resp = ctx._host_raw_call(endpoint)
|
|
|
|
| 798 |
repo_type=rt,
|
| 799 |
)
|
| 800 |
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
| 801 |
+
try:
|
| 802 |
+
normalized_where = ctx._normalize_where(
|
| 803 |
+
where, allowed_fields=ACTOR_CANONICAL_FIELDS
|
| 804 |
+
)
|
| 805 |
+
except ValueError as exc:
|
| 806 |
+
return ctx._helper_error(
|
| 807 |
+
start_calls=start_calls,
|
| 808 |
+
source=endpoint,
|
| 809 |
+
error=exc,
|
| 810 |
+
repo_id=rid,
|
| 811 |
+
repo_type=rt,
|
| 812 |
+
)
|
| 813 |
normalized: list[dict[str, Any]] = []
|
| 814 |
for row in payload:
|
| 815 |
if not isinstance(row, dict):
|
|
|
|
| 823 |
"type": row.get("type")
|
| 824 |
if isinstance(row.get("type"), str) and row.get("type")
|
| 825 |
else "user",
|
| 826 |
+
"is_pro": row.get("isPro"),
|
| 827 |
}
|
| 828 |
+
if pro_only is True and item.get("is_pro") is not True:
|
| 829 |
continue
|
| 830 |
+
if pro_only is False and item.get("is_pro") is True:
|
| 831 |
continue
|
| 832 |
if not ctx._item_matches_where(item, normalized_where):
|
| 833 |
continue
|
| 834 |
normalized.append(item)
|
| 835 |
if count_only:
|
| 836 |
+
applied_limit = 0
|
| 837 |
+
elif requested_limit is None:
|
| 838 |
+
applied_limit = default_limit
|
| 839 |
else:
|
| 840 |
try:
|
| 841 |
+
applied_limit = max(0, int(requested_limit))
|
| 842 |
except Exception:
|
| 843 |
+
applied_limit = default_limit
|
| 844 |
limit_plan = {
|
| 845 |
+
"requested_limit": requested_limit,
|
| 846 |
+
"applied_limit": applied_limit,
|
| 847 |
"default_limit_used": default_limit_used,
|
| 848 |
"hard_cap_applied": False,
|
| 849 |
}
|
| 850 |
matched = len(normalized)
|
| 851 |
+
items = [] if count_only else normalized[:applied_limit]
|
| 852 |
+
limit_hit = applied_limit > 0 and matched > applied_limit
|
| 853 |
truncated_by = ctx._derive_truncated_by(
|
| 854 |
+
hard_cap=False, limit_hit=limit_hit
|
| 855 |
)
|
| 856 |
+
sample_complete = matched <= applied_limit and (not count_only or matched == 0)
|
| 857 |
truncated = truncated_by != "none"
|
| 858 |
more_available = ctx._derive_more_available(
|
| 859 |
sample_complete=sample_complete,
|
|
|
|
| 861 |
returned=len(items),
|
| 862 |
total=matched,
|
| 863 |
)
|
| 864 |
+
try:
|
| 865 |
+
items = ctx._project_actor_items(items, fields)
|
| 866 |
+
except ValueError as exc:
|
| 867 |
+
return ctx._helper_error(
|
| 868 |
+
start_calls=start_calls,
|
| 869 |
+
source=endpoint,
|
| 870 |
+
error=exc,
|
| 871 |
+
repo_id=rid,
|
| 872 |
+
repo_type=rt,
|
| 873 |
+
)
|
| 874 |
meta = ctx._build_exhaustive_meta(
|
| 875 |
base_meta={
|
| 876 |
"scanned": len(payload),
|
|
|
|
| 901 |
|
| 902 |
|
| 903 |
async def hf_repo_discussions(
|
| 904 |
+
ctx: HelperRuntimeContext,
|
| 905 |
+
repo_type: str,
|
| 906 |
+
repo_id: str,
|
| 907 |
+
limit: int = 20,
|
| 908 |
+
fields: list[str] | None = None,
|
| 909 |
) -> dict[str, Any]:
|
| 910 |
start_calls = ctx.call_count["n"]
|
| 911 |
rt = ctx._canonical_repo_type(repo_type)
|
|
|
|
| 940 |
items.append(
|
| 941 |
{
|
| 942 |
"num": num,
|
| 943 |
+
"repo_id": rid,
|
| 944 |
+
"repo_type": rt,
|
|
|
|
| 945 |
"title": getattr(d, "title", None),
|
| 946 |
"author": getattr(d, "author", None),
|
| 947 |
+
"created_at": str(getattr(d, "created_at", None))
|
| 948 |
if getattr(d, "created_at", None) is not None
|
| 949 |
else None,
|
| 950 |
"status": getattr(d, "status", None),
|
| 951 |
+
"url": getattr(d, "url", None),
|
| 952 |
}
|
| 953 |
)
|
| 954 |
+
try:
|
| 955 |
+
items = ctx._project_discussion_items(items, fields)
|
| 956 |
+
except ValueError as exc:
|
| 957 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=exc)
|
| 958 |
return ctx._helper_success(
|
| 959 |
start_calls=start_calls,
|
| 960 |
source=endpoint,
|
|
|
|
| 968 |
|
| 969 |
|
| 970 |
async def hf_repo_discussion_details(
|
| 971 |
+
ctx: HelperRuntimeContext,
|
| 972 |
+
repo_type: str,
|
| 973 |
+
repo_id: str,
|
| 974 |
+
discussion_num: int,
|
| 975 |
+
fields: list[str] | None = None,
|
| 976 |
) -> dict[str, Any]:
|
| 977 |
start_calls = ctx.call_count["n"]
|
| 978 |
rt = ctx._canonical_repo_type(repo_type)
|
|
|
|
| 1009 |
comment_events.append(
|
| 1010 |
{
|
| 1011 |
"author": getattr(event, "author", None),
|
| 1012 |
+
"created_at": ctx._dt_to_str(getattr(event, "created_at", None)),
|
| 1013 |
"text": getattr(event, "content", None),
|
| 1014 |
"rendered": getattr(event, "rendered", None),
|
| 1015 |
}
|
|
|
|
| 1017 |
latest_comment: dict[str, Any] | None = None
|
| 1018 |
if comment_events:
|
| 1019 |
latest_comment = max(
|
| 1020 |
+
comment_events, key=lambda row: str(row.get("created_at") or "")
|
| 1021 |
)
|
| 1022 |
item: dict[str, Any] = {
|
| 1023 |
"num": num,
|
|
|
|
|
|
|
|
|
|
| 1024 |
"repo_id": rid,
|
| 1025 |
"repo_type": rt,
|
| 1026 |
"title": getattr(detail, "title", None),
|
| 1027 |
"author": getattr(detail, "author", None),
|
| 1028 |
+
"created_at": ctx._dt_to_str(getattr(detail, "created_at", None)),
|
| 1029 |
"status": getattr(detail, "status", None),
|
| 1030 |
"url": getattr(detail, "url", None),
|
| 1031 |
+
"comment_count": len(comment_events),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1032 |
"latest_comment_author": latest_comment.get("author")
|
| 1033 |
if latest_comment
|
| 1034 |
else None,
|
| 1035 |
+
"latest_comment_created_at": latest_comment.get("created_at")
|
| 1036 |
if latest_comment
|
| 1037 |
else None,
|
| 1038 |
"latest_comment_text": latest_comment.get("text") if latest_comment else None,
|
|
|
|
| 1040 |
if latest_comment
|
| 1041 |
else None,
|
| 1042 |
}
|
| 1043 |
+
try:
|
| 1044 |
+
items = ctx._project_discussion_detail_items([item], fields)
|
| 1045 |
+
except ValueError as exc:
|
| 1046 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=exc)
|
| 1047 |
return ctx._helper_success(
|
| 1048 |
start_calls=start_calls,
|
| 1049 |
source=endpoint,
|
| 1050 |
+
items=items,
|
| 1051 |
scanned=len(comment_events),
|
| 1052 |
matched=1,
|
| 1053 |
+
returned=len(items),
|
| 1054 |
truncated=False,
|
| 1055 |
total_comments=len(comment_events),
|
| 1056 |
)
|
|
|
|
| 1151 |
failures=failures,
|
| 1152 |
repo_type=repo_type,
|
| 1153 |
)
|
| 1154 |
+
try:
|
| 1155 |
+
items = ctx._project_repo_items(items, fields)
|
| 1156 |
+
except ValueError as exc:
|
| 1157 |
+
return ctx._helper_error(start_calls=start_calls, source="/api/repos", error=exc)
|
| 1158 |
return ctx._helper_success(
|
| 1159 |
start_calls=start_calls,
|
| 1160 |
source="/api/repos",
|
|
|
|
| 1175 |
fields: list[str] | None = None,
|
| 1176 |
) -> dict[str, Any]:
|
| 1177 |
start_calls = ctx.call_count["n"]
|
| 1178 |
+
default_limit = ctx._policy_int("hf_trending", "default_limit", 20)
|
| 1179 |
+
max_limit = ctx._policy_int(
|
| 1180 |
+
"hf_trending", "max_limit", TRENDING_ENDPOINT_MAX_LIMIT
|
| 1181 |
)
|
| 1182 |
raw_type = str(repo_type or "model").strip().lower()
|
| 1183 |
if raw_type == "all":
|
|
|
|
| 1190 |
source="/api/trending",
|
| 1191 |
error=f"Unsupported repo_type '{repo_type}'",
|
| 1192 |
)
|
| 1193 |
+
lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
|
| 1194 |
resp = ctx._host_raw_call(
|
| 1195 |
"/api/trending", params={"type": requested_type, "limit": lim}
|
| 1196 |
)
|
|
|
|
| 1213 |
continue
|
| 1214 |
repo = row.get("repoData") if isinstance(row.get("repoData"), dict) else {}
|
| 1215 |
items.append(ctx._normalize_trending_row(repo, default_row_type, rank=idx))
|
| 1216 |
+
try:
|
| 1217 |
+
items = ctx._apply_where(items, where, allowed_fields=REPO_CANONICAL_FIELDS)
|
| 1218 |
+
except ValueError as exc:
|
| 1219 |
+
return ctx._helper_error(
|
| 1220 |
+
start_calls=start_calls,
|
| 1221 |
+
source="/api/trending",
|
| 1222 |
+
error=exc,
|
| 1223 |
+
)
|
| 1224 |
matched = len(items)
|
| 1225 |
+
try:
|
| 1226 |
+
items = ctx._project_repo_items(items[:lim], fields)
|
| 1227 |
+
except ValueError as exc:
|
| 1228 |
+
return ctx._helper_error(
|
| 1229 |
+
start_calls=start_calls,
|
| 1230 |
+
source="/api/trending",
|
| 1231 |
+
error=exc,
|
| 1232 |
+
)
|
| 1233 |
return ctx._helper_success(
|
| 1234 |
start_calls=start_calls,
|
| 1235 |
source="/api/trending",
|
|
|
|
| 1253 |
fields: list[str] | None = None,
|
| 1254 |
) -> dict[str, Any]:
|
| 1255 |
start_calls = ctx.call_count["n"]
|
| 1256 |
+
default_limit = ctx._policy_int("hf_daily_papers", "default_limit", 20)
|
| 1257 |
+
max_limit = ctx._policy_int(
|
| 1258 |
+
"hf_daily_papers", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 1259 |
)
|
| 1260 |
+
lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
|
| 1261 |
resp = ctx._host_raw_call("/api/daily_papers", params={"limit": lim})
|
| 1262 |
if not resp.get("ok"):
|
| 1263 |
return ctx._helper_error(
|
|
|
|
| 1271 |
if not isinstance(row, dict):
|
| 1272 |
continue
|
| 1273 |
items.append(ctx._normalize_daily_paper_row(row, rank=idx))
|
| 1274 |
+
try:
|
| 1275 |
+
items = ctx._apply_where(
|
| 1276 |
+
items, where, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
|
| 1277 |
+
)
|
| 1278 |
+
except ValueError as exc:
|
| 1279 |
+
return ctx._helper_error(
|
| 1280 |
+
start_calls=start_calls,
|
| 1281 |
+
source="/api/daily_papers",
|
| 1282 |
+
error=exc,
|
| 1283 |
+
)
|
| 1284 |
matched = len(items)
|
| 1285 |
+
try:
|
| 1286 |
+
items = ctx._project_daily_paper_items(items[:lim], fields)
|
| 1287 |
+
except ValueError as exc:
|
| 1288 |
+
return ctx._helper_error(
|
| 1289 |
+
start_calls=start_calls,
|
| 1290 |
+
source="/api/daily_papers",
|
| 1291 |
+
error=exc,
|
| 1292 |
+
)
|
| 1293 |
return ctx._helper_success(
|
| 1294 |
start_calls=start_calls,
|
| 1295 |
source="/api/daily_papers",
|
|
|
|
| 1304 |
|
| 1305 |
def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
|
| 1306 |
return {
|
| 1307 |
+
"hf_models_search": partial(hf_models_search, ctx),
|
| 1308 |
+
"hf_datasets_search": partial(hf_datasets_search, ctx),
|
| 1309 |
+
"hf_spaces_search": partial(hf_spaces_search, ctx),
|
| 1310 |
"hf_repo_search": partial(hf_repo_search, ctx),
|
| 1311 |
"hf_user_likes": partial(hf_user_likes, ctx),
|
| 1312 |
"hf_repo_likers": partial(hf_repo_likers, ctx),
|
monty_api/http_runtime.py
CHANGED
|
@@ -9,7 +9,7 @@ from urllib.request import Request, urlopen
|
|
| 9 |
|
| 10 |
from huggingface_hub import HfApi
|
| 11 |
|
| 12 |
-
from .aliases import REPO_SORT_KEYS
|
| 13 |
from .constants import (
|
| 14 |
DEFAULT_TIMEOUT_SEC,
|
| 15 |
)
|
|
@@ -78,10 +78,14 @@ def _normalize_repo_sort_key(
|
|
| 78 |
if not raw:
|
| 79 |
return None, None
|
| 80 |
|
| 81 |
-
key =
|
| 82 |
-
if key
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
return None, f"Invalid sort key '{raw}'"
|
| 86 |
|
| 87 |
rt = _canonical_repo_type(repo_type)
|
|
|
|
| 9 |
|
| 10 |
from huggingface_hub import HfApi
|
| 11 |
|
| 12 |
+
from .aliases import REPO_SORT_KEYS
|
| 13 |
from .constants import (
|
| 14 |
DEFAULT_TIMEOUT_SEC,
|
| 15 |
)
|
|
|
|
| 78 |
if not raw:
|
| 79 |
return None, None
|
| 80 |
|
| 81 |
+
key = raw
|
| 82 |
+
if key not in {
|
| 83 |
+
"created_at",
|
| 84 |
+
"downloads",
|
| 85 |
+
"last_modified",
|
| 86 |
+
"likes",
|
| 87 |
+
"trending_score",
|
| 88 |
+
}:
|
| 89 |
return None, f"Invalid sort key '{raw}'"
|
| 90 |
|
| 91 |
rt = _canonical_repo_type(repo_type)
|
monty_api/registry.py
CHANGED
|
@@ -8,6 +8,8 @@ from .constants import (
|
|
| 8 |
ACTOR_CANONICAL_FIELDS,
|
| 9 |
COLLECTION_CANONICAL_FIELDS,
|
| 10 |
DAILY_PAPER_CANONICAL_FIELDS,
|
|
|
|
|
|
|
| 11 |
GRAPH_SCAN_LIMIT_CAP,
|
| 12 |
LIKES_ENRICHMENT_MAX_REPOS,
|
| 13 |
LIKES_RANKING_WINDOW_DEFAULT,
|
|
@@ -18,6 +20,7 @@ from .constants import (
|
|
| 18 |
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 19 |
REPO_CANONICAL_FIELDS,
|
| 20 |
TRENDING_ENDPOINT_MAX_LIMIT,
|
|
|
|
| 21 |
)
|
| 22 |
|
| 23 |
|
|
@@ -39,7 +42,6 @@ REPO_SEARCH_EXTRA_ARGS: dict[str, set[str]] = {
|
|
| 39 |
"benchmark",
|
| 40 |
"dataset_name",
|
| 41 |
"expand",
|
| 42 |
-
"filter",
|
| 43 |
"full",
|
| 44 |
"gated",
|
| 45 |
"language",
|
|
@@ -52,11 +54,9 @@ REPO_SEARCH_EXTRA_ARGS: dict[str, set[str]] = {
|
|
| 52 |
"model": {
|
| 53 |
"apps",
|
| 54 |
"cardData",
|
| 55 |
-
"card_data",
|
| 56 |
"emissions_thresholds",
|
| 57 |
"expand",
|
| 58 |
"fetch_config",
|
| 59 |
-
"filter",
|
| 60 |
"full",
|
| 61 |
"gated",
|
| 62 |
"inference",
|
|
@@ -65,7 +65,7 @@ REPO_SEARCH_EXTRA_ARGS: dict[str, set[str]] = {
|
|
| 65 |
"pipeline_tag",
|
| 66 |
"trained_dataset",
|
| 67 |
},
|
| 68 |
-
"space": {"datasets", "expand", "
|
| 69 |
}
|
| 70 |
|
| 71 |
REPO_SEARCH_DEFAULT_EXPAND: dict[str, list[str]] = {
|
|
@@ -206,7 +206,6 @@ RUNTIME_CAPABILITY_FIELDS = [
|
|
| 206 |
"helpers",
|
| 207 |
"helper_defaults",
|
| 208 |
"fields",
|
| 209 |
-
"aliases",
|
| 210 |
"limits",
|
| 211 |
"repo_search",
|
| 212 |
]
|
|
@@ -306,7 +305,7 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 306 |
"hf_whoami",
|
| 307 |
endpoint_patterns=(r"^/api/whoami-v2$",),
|
| 308 |
default_metadata=_metadata(
|
| 309 |
-
default_fields=["username", "fullname", "
|
| 310 |
guaranteed_fields=["username"],
|
| 311 |
notes="Returns the current authenticated user when a request token is available.",
|
| 312 |
),
|
|
@@ -340,7 +339,55 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 340 |
max_limit=GRAPH_SCAN_LIMIT_CAP,
|
| 341 |
notes="Returns organization member summary rows.",
|
| 342 |
),
|
| 343 |
-
pagination={"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
),
|
| 345 |
"hf_repo_search": _config(
|
| 346 |
"hf_repo_search",
|
|
@@ -352,11 +399,12 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 352 |
default_limit=20,
|
| 353 |
max_limit=5_000,
|
| 354 |
notes=(
|
| 355 |
-
"
|
| 356 |
-
"
|
|
|
|
| 357 |
),
|
| 358 |
),
|
| 359 |
-
pagination={"
|
| 360 |
),
|
| 361 |
"hf_user_graph": _config(
|
| 362 |
"hf_user_graph",
|
|
@@ -373,8 +421,8 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 373 |
notes="Returns followers/following summary rows.",
|
| 374 |
),
|
| 375 |
pagination={
|
| 376 |
-
"
|
| 377 |
-
"
|
| 378 |
"scan_max": GRAPH_SCAN_LIMIT_CAP,
|
| 379 |
},
|
| 380 |
),
|
|
@@ -390,21 +438,13 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 390 |
default_limit=1_000,
|
| 391 |
notes="Returns users who liked a repo.",
|
| 392 |
),
|
| 393 |
-
pagination={"
|
| 394 |
),
|
| 395 |
"hf_user_likes": _config(
|
| 396 |
"hf_user_likes",
|
| 397 |
endpoint_patterns=(r"^/api/users/[^/]+/likes$",),
|
| 398 |
default_metadata=_metadata(
|
| 399 |
-
default_fields=
|
| 400 |
-
"liked_at",
|
| 401 |
-
"repo_id",
|
| 402 |
-
"repo_type",
|
| 403 |
-
"repo_author",
|
| 404 |
-
"repo_likes",
|
| 405 |
-
"repo_downloads",
|
| 406 |
-
"repo_url",
|
| 407 |
-
],
|
| 408 |
guaranteed_fields=["liked_at", "repo_id", "repo_type"],
|
| 409 |
optional_fields=["repo_author", "repo_likes", "repo_downloads", "repo_url"],
|
| 410 |
default_limit=100,
|
|
@@ -417,7 +457,7 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 417 |
),
|
| 418 |
),
|
| 419 |
pagination={
|
| 420 |
-
"
|
| 421 |
"enrich_max": LIKES_ENRICHMENT_MAX_REPOS,
|
| 422 |
"ranking_default": LIKES_RANKING_WINDOW_DEFAULT,
|
| 423 |
"scan_max": LIKES_SCAN_LIMIT_CAP,
|
|
@@ -436,7 +476,7 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 436 |
notes="Activity helper may fetch multiple pages when requested coverage exceeds one page.",
|
| 437 |
),
|
| 438 |
pagination={
|
| 439 |
-
"
|
| 440 |
"max_pages": RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 441 |
"page_limit": RECENT_ACTIVITY_PAGE_SIZE,
|
| 442 |
},
|
|
@@ -445,18 +485,9 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 445 |
"hf_repo_discussions",
|
| 446 |
endpoint_patterns=(r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions$",),
|
| 447 |
default_metadata=_metadata(
|
| 448 |
-
default_fields=
|
| 449 |
-
"num",
|
| 450 |
-
"title",
|
| 451 |
-
"author",
|
| 452 |
-
"status",
|
| 453 |
-
"createdAt",
|
| 454 |
-
"repo_id",
|
| 455 |
-
"repo_type",
|
| 456 |
-
"url",
|
| 457 |
-
],
|
| 458 |
guaranteed_fields=["num", "title", "author", "status"],
|
| 459 |
-
optional_fields=["
|
| 460 |
default_limit=20,
|
| 461 |
max_limit=200,
|
| 462 |
notes="Discussion summary helper.",
|
|
@@ -468,39 +499,13 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 468 |
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+$",
|
| 469 |
),
|
| 470 |
default_metadata=_metadata(
|
| 471 |
-
default_fields=
|
| 472 |
-
"number",
|
| 473 |
-
"discussionNum",
|
| 474 |
-
"id",
|
| 475 |
-
"repo_id",
|
| 476 |
-
"repo_type",
|
| 477 |
-
"title",
|
| 478 |
-
"author",
|
| 479 |
-
"createdAt",
|
| 480 |
-
"status",
|
| 481 |
-
"url",
|
| 482 |
-
"commentCount",
|
| 483 |
-
"latestCommentAuthor",
|
| 484 |
-
"latestCommentCreatedAt",
|
| 485 |
-
"latestCommentText",
|
| 486 |
-
"latestCommentHtml",
|
| 487 |
-
"latest_comment_author",
|
| 488 |
-
"latest_comment_created_at",
|
| 489 |
-
"latest_comment_text",
|
| 490 |
-
"latest_comment_html",
|
| 491 |
-
],
|
| 492 |
guaranteed_fields=["repo_id", "repo_type", "title", "author", "status"],
|
| 493 |
optional_fields=[
|
| 494 |
-
"
|
| 495 |
-
"
|
| 496 |
-
"id",
|
| 497 |
-
"createdAt",
|
| 498 |
"url",
|
| 499 |
-
"
|
| 500 |
-
"latestCommentAuthor",
|
| 501 |
-
"latestCommentCreatedAt",
|
| 502 |
-
"latestCommentText",
|
| 503 |
-
"latestCommentHtml",
|
| 504 |
"latest_comment_author",
|
| 505 |
"latest_comment_created_at",
|
| 506 |
"latest_comment_text",
|
|
@@ -537,7 +542,7 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 537 |
max_limit=TRENDING_ENDPOINT_MAX_LIMIT,
|
| 538 |
notes="Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.",
|
| 539 |
),
|
| 540 |
-
pagination={"
|
| 541 |
),
|
| 542 |
"hf_daily_papers": _config(
|
| 543 |
"hf_daily_papers",
|
|
@@ -550,7 +555,7 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 550 |
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 551 |
notes="Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.",
|
| 552 |
),
|
| 553 |
-
pagination={"
|
| 554 |
),
|
| 555 |
"hf_collections_search": _config(
|
| 556 |
"hf_collections_search",
|
|
@@ -563,7 +568,7 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 563 |
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 564 |
notes="Collection summary helper.",
|
| 565 |
),
|
| 566 |
-
pagination={"
|
| 567 |
),
|
| 568 |
"hf_collection_items": _config(
|
| 569 |
"hf_collection_items",
|
|
@@ -583,7 +588,7 @@ HELPER_CONFIGS: dict[str, HelperConfig] = {
|
|
| 583 |
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 584 |
notes="Returns repos inside one collection as summary rows.",
|
| 585 |
),
|
| 586 |
-
pagination={"
|
| 587 |
),
|
| 588 |
}
|
| 589 |
|
|
|
|
| 8 |
ACTOR_CANONICAL_FIELDS,
|
| 9 |
COLLECTION_CANONICAL_FIELDS,
|
| 10 |
DAILY_PAPER_CANONICAL_FIELDS,
|
| 11 |
+
DISCUSSION_CANONICAL_FIELDS,
|
| 12 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 13 |
GRAPH_SCAN_LIMIT_CAP,
|
| 14 |
LIKES_ENRICHMENT_MAX_REPOS,
|
| 15 |
LIKES_RANKING_WINDOW_DEFAULT,
|
|
|
|
| 20 |
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 21 |
REPO_CANONICAL_FIELDS,
|
| 22 |
TRENDING_ENDPOINT_MAX_LIMIT,
|
| 23 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 24 |
)
|
| 25 |
|
| 26 |
|
|
|
|
| 42 |
"benchmark",
|
| 43 |
"dataset_name",
|
| 44 |
"expand",
|
|
|
|
| 45 |
"full",
|
| 46 |
"gated",
|
| 47 |
"language",
|
|
|
|
| 54 |
"model": {
|
| 55 |
"apps",
|
| 56 |
"cardData",
|
|
|
|
| 57 |
"emissions_thresholds",
|
| 58 |
"expand",
|
| 59 |
"fetch_config",
|
|
|
|
| 60 |
"full",
|
| 61 |
"gated",
|
| 62 |
"inference",
|
|
|
|
| 65 |
"pipeline_tag",
|
| 66 |
"trained_dataset",
|
| 67 |
},
|
| 68 |
+
"space": {"datasets", "expand", "full", "linked", "models"},
|
| 69 |
}
|
| 70 |
|
| 71 |
REPO_SEARCH_DEFAULT_EXPAND: dict[str, list[str]] = {
|
|
|
|
| 206 |
"helpers",
|
| 207 |
"helper_defaults",
|
| 208 |
"fields",
|
|
|
|
| 209 |
"limits",
|
| 210 |
"repo_search",
|
| 211 |
]
|
|
|
|
| 305 |
"hf_whoami",
|
| 306 |
endpoint_patterns=(r"^/api/whoami-v2$",),
|
| 307 |
default_metadata=_metadata(
|
| 308 |
+
default_fields=["username", "fullname", "is_pro"],
|
| 309 |
guaranteed_fields=["username"],
|
| 310 |
notes="Returns the current authenticated user when a request token is available.",
|
| 311 |
),
|
|
|
|
| 339 |
max_limit=GRAPH_SCAN_LIMIT_CAP,
|
| 340 |
notes="Returns organization member summary rows.",
|
| 341 |
),
|
| 342 |
+
pagination={"default_limit": 1_000, "scan_max": GRAPH_SCAN_LIMIT_CAP},
|
| 343 |
+
),
|
| 344 |
+
"hf_models_search": _config(
|
| 345 |
+
"hf_models_search",
|
| 346 |
+
endpoint_patterns=(r"^/api/models$",),
|
| 347 |
+
default_metadata=_metadata(
|
| 348 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 349 |
+
guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
|
| 350 |
+
optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
|
| 351 |
+
default_limit=20,
|
| 352 |
+
max_limit=5_000,
|
| 353 |
+
notes=(
|
| 354 |
+
"Thin model-search wrapper around the Hub list_models path. Prefer this "
|
| 355 |
+
"over hf_repo_search for model-only queries."
|
| 356 |
+
),
|
| 357 |
+
),
|
| 358 |
+
pagination={"default_limit": 20, "max_limit": 5_000},
|
| 359 |
+
),
|
| 360 |
+
"hf_datasets_search": _config(
|
| 361 |
+
"hf_datasets_search",
|
| 362 |
+
endpoint_patterns=(r"^/api/datasets$",),
|
| 363 |
+
default_metadata=_metadata(
|
| 364 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 365 |
+
guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
|
| 366 |
+
optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
|
| 367 |
+
default_limit=20,
|
| 368 |
+
max_limit=5_000,
|
| 369 |
+
notes=(
|
| 370 |
+
"Thin dataset-search wrapper around the Hub list_datasets path. Prefer "
|
| 371 |
+
"this over hf_repo_search for dataset-only queries."
|
| 372 |
+
),
|
| 373 |
+
),
|
| 374 |
+
pagination={"default_limit": 20, "max_limit": 5_000},
|
| 375 |
+
),
|
| 376 |
+
"hf_spaces_search": _config(
|
| 377 |
+
"hf_spaces_search",
|
| 378 |
+
endpoint_patterns=(r"^/api/spaces$",),
|
| 379 |
+
default_metadata=_metadata(
|
| 380 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 381 |
+
guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
|
| 382 |
+
optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
|
| 383 |
+
default_limit=20,
|
| 384 |
+
max_limit=5_000,
|
| 385 |
+
notes=(
|
| 386 |
+
"Thin space-search wrapper around the Hub list_spaces path. Prefer this "
|
| 387 |
+
"over hf_repo_search for space-only queries."
|
| 388 |
+
),
|
| 389 |
+
),
|
| 390 |
+
pagination={"default_limit": 20, "max_limit": 5_000},
|
| 391 |
),
|
| 392 |
"hf_repo_search": _config(
|
| 393 |
"hf_repo_search",
|
|
|
|
| 399 |
default_limit=20,
|
| 400 |
max_limit=5_000,
|
| 401 |
notes=(
|
| 402 |
+
"Small generic repo-search helper. Prefer hf_models_search, "
|
| 403 |
+
"hf_datasets_search, or hf_spaces_search for single-type queries; use "
|
| 404 |
+
"hf_repo_search for intentionally cross-type search."
|
| 405 |
),
|
| 406 |
),
|
| 407 |
+
pagination={"default_limit": 20, "max_limit": 5_000},
|
| 408 |
),
|
| 409 |
"hf_user_graph": _config(
|
| 410 |
"hf_user_graph",
|
|
|
|
| 421 |
notes="Returns followers/following summary rows.",
|
| 422 |
),
|
| 423 |
pagination={
|
| 424 |
+
"default_limit": 1_000,
|
| 425 |
+
"max_limit": GRAPH_SCAN_LIMIT_CAP,
|
| 426 |
"scan_max": GRAPH_SCAN_LIMIT_CAP,
|
| 427 |
},
|
| 428 |
),
|
|
|
|
| 438 |
default_limit=1_000,
|
| 439 |
notes="Returns users who liked a repo.",
|
| 440 |
),
|
| 441 |
+
pagination={"default_limit": 1_000},
|
| 442 |
),
|
| 443 |
"hf_user_likes": _config(
|
| 444 |
"hf_user_likes",
|
| 445 |
endpoint_patterns=(r"^/api/users/[^/]+/likes$",),
|
| 446 |
default_metadata=_metadata(
|
| 447 |
+
default_fields=list(USER_LIKES_CANONICAL_FIELDS),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
guaranteed_fields=["liked_at", "repo_id", "repo_type"],
|
| 449 |
optional_fields=["repo_author", "repo_likes", "repo_downloads", "repo_url"],
|
| 450 |
default_limit=100,
|
|
|
|
| 457 |
),
|
| 458 |
),
|
| 459 |
pagination={
|
| 460 |
+
"default_limit": 100,
|
| 461 |
"enrich_max": LIKES_ENRICHMENT_MAX_REPOS,
|
| 462 |
"ranking_default": LIKES_RANKING_WINDOW_DEFAULT,
|
| 463 |
"scan_max": LIKES_SCAN_LIMIT_CAP,
|
|
|
|
| 476 |
notes="Activity helper may fetch multiple pages when requested coverage exceeds one page.",
|
| 477 |
),
|
| 478 |
pagination={
|
| 479 |
+
"default_limit": 100,
|
| 480 |
"max_pages": RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 481 |
"page_limit": RECENT_ACTIVITY_PAGE_SIZE,
|
| 482 |
},
|
|
|
|
| 485 |
"hf_repo_discussions",
|
| 486 |
endpoint_patterns=(r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions$",),
|
| 487 |
default_metadata=_metadata(
|
| 488 |
+
default_fields=list(DISCUSSION_CANONICAL_FIELDS),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
guaranteed_fields=["num", "title", "author", "status"],
|
| 490 |
+
optional_fields=["repo_id", "repo_type", "created_at", "url"],
|
| 491 |
default_limit=20,
|
| 492 |
max_limit=200,
|
| 493 |
notes="Discussion summary helper.",
|
|
|
|
| 499 |
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+$",
|
| 500 |
),
|
| 501 |
default_metadata=_metadata(
|
| 502 |
+
default_fields=list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
guaranteed_fields=["repo_id", "repo_type", "title", "author", "status"],
|
| 504 |
optional_fields=[
|
| 505 |
+
"num",
|
| 506 |
+
"created_at",
|
|
|
|
|
|
|
| 507 |
"url",
|
| 508 |
+
"comment_count",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
"latest_comment_author",
|
| 510 |
"latest_comment_created_at",
|
| 511 |
"latest_comment_text",
|
|
|
|
| 542 |
max_limit=TRENDING_ENDPOINT_MAX_LIMIT,
|
| 543 |
notes="Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.",
|
| 544 |
),
|
| 545 |
+
pagination={"default_limit": 20, "max_limit": TRENDING_ENDPOINT_MAX_LIMIT},
|
| 546 |
),
|
| 547 |
"hf_daily_papers": _config(
|
| 548 |
"hf_daily_papers",
|
|
|
|
| 555 |
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 556 |
notes="Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.",
|
| 557 |
),
|
| 558 |
+
pagination={"default_limit": 20, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
|
| 559 |
),
|
| 560 |
"hf_collections_search": _config(
|
| 561 |
"hf_collections_search",
|
|
|
|
| 568 |
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 569 |
notes="Collection summary helper.",
|
| 570 |
),
|
| 571 |
+
pagination={"default_limit": 20, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
|
| 572 |
),
|
| 573 |
"hf_collection_items": _config(
|
| 574 |
"hf_collection_items",
|
|
|
|
| 588 |
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 589 |
notes="Returns repos inside one collection as summary rows.",
|
| 590 |
),
|
| 591 |
+
pagination={"default_limit": 100, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
|
| 592 |
),
|
| 593 |
}
|
| 594 |
|
monty_api/runtime_context.py
CHANGED
|
@@ -60,6 +60,8 @@ from .runtime_filtering import (
|
|
| 60 |
_project_activity_items,
|
| 61 |
_project_actor_items,
|
| 62 |
_project_collection_items,
|
|
|
|
|
|
|
| 63 |
_project_daily_paper_items,
|
| 64 |
_project_items,
|
| 65 |
_project_repo_items,
|
|
@@ -215,6 +217,8 @@ for name, value in {
|
|
| 215 |
"_project_items": _project_items,
|
| 216 |
"_project_repo_items": _project_repo_items,
|
| 217 |
"_project_collection_items": _project_collection_items,
|
|
|
|
|
|
|
| 218 |
"_project_daily_paper_items": _project_daily_paper_items,
|
| 219 |
"_project_user_items": _project_user_items,
|
| 220 |
"_project_actor_items": _project_actor_items,
|
|
|
|
| 60 |
_project_activity_items,
|
| 61 |
_project_actor_items,
|
| 62 |
_project_collection_items,
|
| 63 |
+
_project_discussion_detail_items,
|
| 64 |
+
_project_discussion_items,
|
| 65 |
_project_daily_paper_items,
|
| 66 |
_project_items,
|
| 67 |
_project_repo_items,
|
|
|
|
| 217 |
"_project_items": _project_items,
|
| 218 |
"_project_repo_items": _project_repo_items,
|
| 219 |
"_project_collection_items": _project_collection_items,
|
| 220 |
+
"_project_discussion_items": _project_discussion_items,
|
| 221 |
+
"_project_discussion_detail_items": _project_discussion_detail_items,
|
| 222 |
"_project_daily_paper_items": _project_daily_paper_items,
|
| 223 |
"_project_user_items": _project_user_items,
|
| 224 |
"_project_actor_items": _project_actor_items,
|
monty_api/runtime_envelopes.py
CHANGED
|
@@ -21,8 +21,8 @@ def _helper_meta(
|
|
| 21 |
def _derive_limit_metadata(
|
| 22 |
self: Any,
|
| 23 |
*,
|
| 24 |
-
|
| 25 |
-
|
| 26 |
default_limit_used: bool,
|
| 27 |
requested_scan_limit: int | None = None,
|
| 28 |
applied_scan_limit: int | None = None,
|
|
@@ -30,8 +30,8 @@ def _derive_limit_metadata(
|
|
| 30 |
applied_max_pages: int | None = None,
|
| 31 |
) -> dict[str, Any]:
|
| 32 |
meta: dict[str, Any] = {
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
"default_limit_used": default_limit_used,
|
| 36 |
}
|
| 37 |
if requested_scan_limit is not None or applied_scan_limit is not None:
|
|
@@ -42,8 +42,8 @@ def _derive_limit_metadata(
|
|
| 42 |
meta["requested_max_pages"] = requested_max_pages
|
| 43 |
meta["applied_max_pages"] = applied_max_pages
|
| 44 |
meta["page_limit_applied"] = requested_max_pages != applied_max_pages
|
| 45 |
-
if
|
| 46 |
-
meta["hard_cap_applied"] =
|
| 47 |
return meta
|
| 48 |
|
| 49 |
|
|
@@ -68,9 +68,9 @@ def _derive_truncated_by(
|
|
| 68 |
hard_cap: bool = False,
|
| 69 |
scan_limit_hit: bool = False,
|
| 70 |
page_limit_hit: bool = False,
|
| 71 |
-
|
| 72 |
) -> str:
|
| 73 |
-
causes = [hard_cap, scan_limit_hit, page_limit_hit,
|
| 74 |
if sum(1 for cause in causes if cause) > 1:
|
| 75 |
return "multiple"
|
| 76 |
if hard_cap:
|
|
@@ -79,8 +79,8 @@ def _derive_truncated_by(
|
|
| 79 |
return "scan_limit"
|
| 80 |
if page_limit_hit:
|
| 81 |
return "page_limit"
|
| 82 |
-
if
|
| 83 |
-
return "
|
| 84 |
return "none"
|
| 85 |
|
| 86 |
|
|
@@ -89,7 +89,7 @@ def _derive_can_request_more(
|
|
| 89 |
) -> bool:
|
| 90 |
if sample_complete:
|
| 91 |
return False
|
| 92 |
-
return truncated_by in {"
|
| 93 |
|
| 94 |
|
| 95 |
def _derive_next_request_hint(
|
|
@@ -97,12 +97,12 @@ def _derive_next_request_hint(
|
|
| 97 |
*,
|
| 98 |
truncated_by: str,
|
| 99 |
more_available: bool | str,
|
| 100 |
-
|
| 101 |
applied_scan_limit: int | None = None,
|
| 102 |
applied_max_pages: int | None = None,
|
| 103 |
) -> str:
|
| 104 |
-
if truncated_by == "
|
| 105 |
-
return f"Ask for
|
| 106 |
if truncated_by == "scan_limit" and applied_scan_limit is not None:
|
| 107 |
return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
|
| 108 |
if truncated_by == "page_limit" and applied_max_pages is not None:
|
|
@@ -121,28 +121,27 @@ def _derive_next_request_hint(
|
|
| 121 |
def _resolve_exhaustive_limits(
|
| 122 |
self: Any,
|
| 123 |
*,
|
| 124 |
-
|
| 125 |
count_only: bool,
|
| 126 |
-
|
| 127 |
-
|
| 128 |
scan_limit: int | None = None,
|
| 129 |
scan_cap: int | None = None,
|
| 130 |
) -> dict[str, Any]:
|
| 131 |
-
|
| 132 |
-
|
| 133 |
out: dict[str, Any] = {
|
| 134 |
-
"
|
| 135 |
-
"
|
| 136 |
-
|
| 137 |
-
default=
|
| 138 |
minimum=0,
|
| 139 |
-
maximum=
|
| 140 |
),
|
| 141 |
-
"default_limit_used":
|
| 142 |
}
|
| 143 |
out["hard_cap_applied"] = (
|
| 144 |
-
|
| 145 |
-
and out["applied_return_limit"] < requested_return_limit
|
| 146 |
)
|
| 147 |
if scan_cap is not None:
|
| 148 |
out["requested_scan_limit"] = scan_limit
|
|
@@ -168,7 +167,7 @@ def _build_exhaustive_meta(
|
|
| 168 |
applied_max_pages: int | None = None,
|
| 169 |
) -> dict[str, Any]:
|
| 170 |
meta = dict(base_meta)
|
| 171 |
-
|
| 172 |
applied_scan_limit = limit_plan.get("applied_scan_limit")
|
| 173 |
meta.update(
|
| 174 |
{
|
|
@@ -186,7 +185,7 @@ def _build_exhaustive_meta(
|
|
| 186 |
self,
|
| 187 |
truncated_by=truncated_by,
|
| 188 |
more_available=more_available,
|
| 189 |
-
|
| 190 |
applied_scan_limit=applied_scan_limit
|
| 191 |
if isinstance(applied_scan_limit, int)
|
| 192 |
else None,
|
|
@@ -197,8 +196,8 @@ def _build_exhaustive_meta(
|
|
| 197 |
meta.update(
|
| 198 |
_derive_limit_metadata(
|
| 199 |
self,
|
| 200 |
-
|
| 201 |
-
|
| 202 |
default_limit_used=bool(limit_plan["default_limit_used"]),
|
| 203 |
requested_scan_limit=limit_plan.get("requested_scan_limit"),
|
| 204 |
applied_scan_limit=applied_scan_limit
|
|
@@ -263,26 +262,26 @@ def _build_exhaustive_result_meta(
|
|
| 263 |
requested_max_pages: int | None = None,
|
| 264 |
applied_max_pages: int | None = None,
|
| 265 |
) -> dict[str, Any]:
|
| 266 |
-
|
| 267 |
if count_only:
|
| 268 |
effective_sample_complete = exact_count
|
| 269 |
else:
|
| 270 |
effective_sample_complete = (
|
| 271 |
sample_complete
|
| 272 |
if isinstance(sample_complete, bool)
|
| 273 |
-
else exact_count and matched_count <=
|
| 274 |
)
|
| 275 |
-
|
| 276 |
False
|
| 277 |
if count_only
|
| 278 |
-
else (
|
| 279 |
)
|
| 280 |
truncated_by = _derive_truncated_by(
|
| 281 |
self,
|
| 282 |
hard_cap=bool(limit_plan.get("hard_cap_applied")),
|
| 283 |
scan_limit_hit=scan_limit_hit,
|
| 284 |
page_limit_hit=page_limit_hit,
|
| 285 |
-
|
| 286 |
)
|
| 287 |
truncated = truncated_by != "none" or truncated_extra
|
| 288 |
total_value = _as_int(base_meta.get("total"))
|
|
|
|
| 21 |
def _derive_limit_metadata(
|
| 22 |
self: Any,
|
| 23 |
*,
|
| 24 |
+
requested_limit: int | None,
|
| 25 |
+
applied_limit: int,
|
| 26 |
default_limit_used: bool,
|
| 27 |
requested_scan_limit: int | None = None,
|
| 28 |
applied_scan_limit: int | None = None,
|
|
|
|
| 30 |
applied_max_pages: int | None = None,
|
| 31 |
) -> dict[str, Any]:
|
| 32 |
meta: dict[str, Any] = {
|
| 33 |
+
"requested_limit": requested_limit,
|
| 34 |
+
"applied_limit": applied_limit,
|
| 35 |
"default_limit_used": default_limit_used,
|
| 36 |
}
|
| 37 |
if requested_scan_limit is not None or applied_scan_limit is not None:
|
|
|
|
| 42 |
meta["requested_max_pages"] = requested_max_pages
|
| 43 |
meta["applied_max_pages"] = applied_max_pages
|
| 44 |
meta["page_limit_applied"] = requested_max_pages != applied_max_pages
|
| 45 |
+
if requested_limit is not None:
|
| 46 |
+
meta["hard_cap_applied"] = applied_limit < requested_limit
|
| 47 |
return meta
|
| 48 |
|
| 49 |
|
|
|
|
| 68 |
hard_cap: bool = False,
|
| 69 |
scan_limit_hit: bool = False,
|
| 70 |
page_limit_hit: bool = False,
|
| 71 |
+
limit_hit: bool = False,
|
| 72 |
) -> str:
|
| 73 |
+
causes = [hard_cap, scan_limit_hit, page_limit_hit, limit_hit]
|
| 74 |
if sum(1 for cause in causes if cause) > 1:
|
| 75 |
return "multiple"
|
| 76 |
if hard_cap:
|
|
|
|
| 79 |
return "scan_limit"
|
| 80 |
if page_limit_hit:
|
| 81 |
return "page_limit"
|
| 82 |
+
if limit_hit:
|
| 83 |
+
return "limit"
|
| 84 |
return "none"
|
| 85 |
|
| 86 |
|
|
|
|
| 89 |
) -> bool:
|
| 90 |
if sample_complete:
|
| 91 |
return False
|
| 92 |
+
return truncated_by in {"limit", "scan_limit", "page_limit", "multiple"}
|
| 93 |
|
| 94 |
|
| 95 |
def _derive_next_request_hint(
|
|
|
|
| 97 |
*,
|
| 98 |
truncated_by: str,
|
| 99 |
more_available: bool | str,
|
| 100 |
+
applied_limit: int,
|
| 101 |
applied_scan_limit: int | None = None,
|
| 102 |
applied_max_pages: int | None = None,
|
| 103 |
) -> str:
|
| 104 |
+
if truncated_by == "limit":
|
| 105 |
+
return f"Ask for limit>{applied_limit} to see more rows"
|
| 106 |
if truncated_by == "scan_limit" and applied_scan_limit is not None:
|
| 107 |
return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
|
| 108 |
if truncated_by == "page_limit" and applied_max_pages is not None:
|
|
|
|
| 121 |
def _resolve_exhaustive_limits(
|
| 122 |
self: Any,
|
| 123 |
*,
|
| 124 |
+
limit: int | None,
|
| 125 |
count_only: bool,
|
| 126 |
+
default_limit: int,
|
| 127 |
+
max_limit: int,
|
| 128 |
scan_limit: int | None = None,
|
| 129 |
scan_cap: int | None = None,
|
| 130 |
) -> dict[str, Any]:
|
| 131 |
+
requested_limit = None if count_only else limit
|
| 132 |
+
effective_requested_limit = 0 if count_only else requested_limit
|
| 133 |
out: dict[str, Any] = {
|
| 134 |
+
"requested_limit": requested_limit,
|
| 135 |
+
"applied_limit": _clamp_int(
|
| 136 |
+
effective_requested_limit,
|
| 137 |
+
default=default_limit,
|
| 138 |
minimum=0,
|
| 139 |
+
maximum=max_limit,
|
| 140 |
),
|
| 141 |
+
"default_limit_used": requested_limit is None and not count_only,
|
| 142 |
}
|
| 143 |
out["hard_cap_applied"] = (
|
| 144 |
+
requested_limit is not None and out["applied_limit"] < requested_limit
|
|
|
|
| 145 |
)
|
| 146 |
if scan_cap is not None:
|
| 147 |
out["requested_scan_limit"] = scan_limit
|
|
|
|
| 167 |
applied_max_pages: int | None = None,
|
| 168 |
) -> dict[str, Any]:
|
| 169 |
meta = dict(base_meta)
|
| 170 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 171 |
applied_scan_limit = limit_plan.get("applied_scan_limit")
|
| 172 |
meta.update(
|
| 173 |
{
|
|
|
|
| 185 |
self,
|
| 186 |
truncated_by=truncated_by,
|
| 187 |
more_available=more_available,
|
| 188 |
+
applied_limit=applied_limit,
|
| 189 |
applied_scan_limit=applied_scan_limit
|
| 190 |
if isinstance(applied_scan_limit, int)
|
| 191 |
else None,
|
|
|
|
| 196 |
meta.update(
|
| 197 |
_derive_limit_metadata(
|
| 198 |
self,
|
| 199 |
+
requested_limit=limit_plan["requested_limit"],
|
| 200 |
+
applied_limit=applied_limit,
|
| 201 |
default_limit_used=bool(limit_plan["default_limit_used"]),
|
| 202 |
requested_scan_limit=limit_plan.get("requested_scan_limit"),
|
| 203 |
applied_scan_limit=applied_scan_limit
|
|
|
|
| 262 |
requested_max_pages: int | None = None,
|
| 263 |
applied_max_pages: int | None = None,
|
| 264 |
) -> dict[str, Any]:
|
| 265 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 266 |
if count_only:
|
| 267 |
effective_sample_complete = exact_count
|
| 268 |
else:
|
| 269 |
effective_sample_complete = (
|
| 270 |
sample_complete
|
| 271 |
if isinstance(sample_complete, bool)
|
| 272 |
+
else exact_count and matched_count <= applied_limit
|
| 273 |
)
|
| 274 |
+
limit_hit = (
|
| 275 |
False
|
| 276 |
if count_only
|
| 277 |
+
else (applied_limit > 0 and matched_count > applied_limit)
|
| 278 |
)
|
| 279 |
truncated_by = _derive_truncated_by(
|
| 280 |
self,
|
| 281 |
hard_cap=bool(limit_plan.get("hard_cap_applied")),
|
| 282 |
scan_limit_hit=scan_limit_hit,
|
| 283 |
page_limit_hit=page_limit_hit,
|
| 284 |
+
limit_hit=limit_hit,
|
| 285 |
)
|
| 286 |
truncated = truncated_by != "none" or truncated_extra
|
| 287 |
total_value = _as_int(base_meta.get("total"))
|
monty_api/runtime_filtering.py
CHANGED
|
@@ -2,40 +2,48 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
from typing import Any
|
| 4 |
|
| 5 |
-
from .
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
)
|
| 14 |
from .http_runtime import _as_int
|
| 15 |
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def _project_items(
|
| 18 |
self: Any,
|
| 19 |
items: list[dict[str, Any]],
|
| 20 |
fields: list[str] | None,
|
| 21 |
-
|
|
|
|
| 22 |
) -> list[dict[str, Any]]:
|
| 23 |
if not isinstance(fields, list) or not fields:
|
| 24 |
return items
|
| 25 |
wanted = [str(field).strip() for field in fields if str(field).strip()]
|
| 26 |
if not wanted:
|
| 27 |
return items
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
if
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
projected: list[dict[str, Any]] = []
|
| 34 |
for row in items:
|
| 35 |
out: dict[str, Any] = {}
|
| 36 |
for key in wanted:
|
| 37 |
-
|
| 38 |
-
value = row.get(source_key)
|
| 39 |
if value is None:
|
| 40 |
continue
|
| 41 |
out[key] = value
|
|
@@ -46,63 +54,88 @@ def _project_items(
|
|
| 46 |
def _project_repo_items(
|
| 47 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 48 |
) -> list[dict[str, Any]]:
|
| 49 |
-
return _project_items(self, items, fields,
|
| 50 |
|
| 51 |
|
| 52 |
def _project_collection_items(
|
| 53 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 54 |
) -> list[dict[str, Any]]:
|
| 55 |
-
return _project_items(
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
def _project_daily_paper_items(
|
| 59 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 60 |
) -> list[dict[str, Any]]:
|
| 61 |
-
return _project_items(
|
|
|
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
def _project_user_items(
|
| 65 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 66 |
) -> list[dict[str, Any]]:
|
| 67 |
-
return _project_items(self, items, fields,
|
| 68 |
|
| 69 |
|
| 70 |
def _project_actor_items(
|
| 71 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 72 |
) -> list[dict[str, Any]]:
|
| 73 |
-
return _project_items(self, items, fields,
|
| 74 |
|
| 75 |
|
| 76 |
def _project_user_like_items(
|
| 77 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 78 |
) -> list[dict[str, Any]]:
|
| 79 |
-
return _project_items(
|
|
|
|
|
|
|
| 80 |
|
| 81 |
|
| 82 |
def _project_activity_items(
|
| 83 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 84 |
) -> list[dict[str, Any]]:
|
| 85 |
-
return _project_items(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def _normalize_where(
|
| 89 |
self: Any,
|
| 90 |
where: dict[str, Any] | None,
|
| 91 |
-
|
|
|
|
| 92 |
) -> dict[str, Any] | None:
|
| 93 |
if not isinstance(where, dict) or not where:
|
| 94 |
return where
|
| 95 |
-
|
| 96 |
-
str(key).strip().lower(): str(value).strip()
|
| 97 |
-
for key, value in (aliases or {}).items()
|
| 98 |
-
if str(key).strip() and str(value).strip()
|
| 99 |
-
}
|
| 100 |
normalized: dict[str, Any] = {}
|
| 101 |
for key, value in where.items():
|
| 102 |
raw_key = str(key).strip()
|
| 103 |
if not raw_key:
|
| 104 |
continue
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
return normalized
|
| 107 |
|
| 108 |
|
|
@@ -161,9 +194,9 @@ def _apply_where(
|
|
| 161 |
items: list[dict[str, Any]],
|
| 162 |
where: dict[str, Any] | None,
|
| 163 |
*,
|
| 164 |
-
|
| 165 |
) -> list[dict[str, Any]]:
|
| 166 |
-
normalized_where = _normalize_where(self, where,
|
| 167 |
if not isinstance(normalized_where, dict) or not normalized_where:
|
| 168 |
return items
|
| 169 |
return [row for row in items if _item_matches_where(self, row, normalized_where)]
|
|
|
|
| 2 |
|
| 3 |
from typing import Any
|
| 4 |
|
| 5 |
+
from .constants import (
|
| 6 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 7 |
+
ACTOR_CANONICAL_FIELDS,
|
| 8 |
+
COLLECTION_CANONICAL_FIELDS,
|
| 9 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 10 |
+
DISCUSSION_CANONICAL_FIELDS,
|
| 11 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 12 |
+
REPO_CANONICAL_FIELDS,
|
| 13 |
+
USER_CANONICAL_FIELDS,
|
| 14 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 15 |
)
|
| 16 |
from .http_runtime import _as_int
|
| 17 |
|
| 18 |
|
| 19 |
+
def _allowed_field_set(allowed_fields: tuple[str, ...] | list[str] | set[str]) -> set[str]:
|
| 20 |
+
return {str(field).strip() for field in allowed_fields if str(field).strip()}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
def _project_items(
|
| 24 |
self: Any,
|
| 25 |
items: list[dict[str, Any]],
|
| 26 |
fields: list[str] | None,
|
| 27 |
+
*,
|
| 28 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 29 |
) -> list[dict[str, Any]]:
|
| 30 |
if not isinstance(fields, list) or not fields:
|
| 31 |
return items
|
| 32 |
wanted = [str(field).strip() for field in fields if str(field).strip()]
|
| 33 |
if not wanted:
|
| 34 |
return items
|
| 35 |
+
if allowed_fields is not None:
|
| 36 |
+
allowed = _allowed_field_set(allowed_fields)
|
| 37 |
+
invalid = sorted(field for field in wanted if field not in allowed)
|
| 38 |
+
if invalid:
|
| 39 |
+
raise ValueError(
|
| 40 |
+
f"Unsupported fields {invalid}. Allowed fields: {sorted(allowed)}"
|
| 41 |
+
)
|
| 42 |
projected: list[dict[str, Any]] = []
|
| 43 |
for row in items:
|
| 44 |
out: dict[str, Any] = {}
|
| 45 |
for key in wanted:
|
| 46 |
+
value = row.get(key)
|
|
|
|
| 47 |
if value is None:
|
| 48 |
continue
|
| 49 |
out[key] = value
|
|
|
|
| 54 |
def _project_repo_items(
|
| 55 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 56 |
) -> list[dict[str, Any]]:
|
| 57 |
+
return _project_items(self, items, fields, allowed_fields=REPO_CANONICAL_FIELDS)
|
| 58 |
|
| 59 |
|
| 60 |
def _project_collection_items(
|
| 61 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 62 |
) -> list[dict[str, Any]]:
|
| 63 |
+
return _project_items(
|
| 64 |
+
self, items, fields, allowed_fields=COLLECTION_CANONICAL_FIELDS
|
| 65 |
+
)
|
| 66 |
|
| 67 |
|
| 68 |
def _project_daily_paper_items(
|
| 69 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 70 |
) -> list[dict[str, Any]]:
|
| 71 |
+
return _project_items(
|
| 72 |
+
self, items, fields, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
|
| 73 |
+
)
|
| 74 |
|
| 75 |
|
| 76 |
def _project_user_items(
|
| 77 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 78 |
) -> list[dict[str, Any]]:
|
| 79 |
+
return _project_items(self, items, fields, allowed_fields=USER_CANONICAL_FIELDS)
|
| 80 |
|
| 81 |
|
| 82 |
def _project_actor_items(
|
| 83 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 84 |
) -> list[dict[str, Any]]:
|
| 85 |
+
return _project_items(self, items, fields, allowed_fields=ACTOR_CANONICAL_FIELDS)
|
| 86 |
|
| 87 |
|
| 88 |
def _project_user_like_items(
|
| 89 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 90 |
) -> list[dict[str, Any]]:
|
| 91 |
+
return _project_items(
|
| 92 |
+
self, items, fields, allowed_fields=USER_LIKES_CANONICAL_FIELDS
|
| 93 |
+
)
|
| 94 |
|
| 95 |
|
| 96 |
def _project_activity_items(
|
| 97 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 98 |
) -> list[dict[str, Any]]:
|
| 99 |
+
return _project_items(
|
| 100 |
+
self, items, fields, allowed_fields=ACTIVITY_CANONICAL_FIELDS
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def _project_discussion_items(
|
| 105 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 106 |
+
) -> list[dict[str, Any]]:
|
| 107 |
+
return _project_items(
|
| 108 |
+
self, items, fields, allowed_fields=DISCUSSION_CANONICAL_FIELDS
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _project_discussion_detail_items(
|
| 113 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 114 |
+
) -> list[dict[str, Any]]:
|
| 115 |
+
return _project_items(
|
| 116 |
+
self, items, fields, allowed_fields=DISCUSSION_DETAIL_CANONICAL_FIELDS
|
| 117 |
+
)
|
| 118 |
|
| 119 |
|
| 120 |
def _normalize_where(
|
| 121 |
self: Any,
|
| 122 |
where: dict[str, Any] | None,
|
| 123 |
+
*,
|
| 124 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 125 |
) -> dict[str, Any] | None:
|
| 126 |
if not isinstance(where, dict) or not where:
|
| 127 |
return where
|
| 128 |
+
allowed = _allowed_field_set(allowed_fields) if allowed_fields is not None else None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
normalized: dict[str, Any] = {}
|
| 130 |
for key, value in where.items():
|
| 131 |
raw_key = str(key).strip()
|
| 132 |
if not raw_key:
|
| 133 |
continue
|
| 134 |
+
if allowed is not None and raw_key not in allowed:
|
| 135 |
+
raise ValueError(
|
| 136 |
+
f"Unsupported filter fields {[raw_key]}. Allowed fields: {sorted(allowed)}"
|
| 137 |
+
)
|
| 138 |
+
normalized[raw_key] = value
|
| 139 |
return normalized
|
| 140 |
|
| 141 |
|
|
|
|
| 194 |
items: list[dict[str, Any]],
|
| 195 |
where: dict[str, Any] | None,
|
| 196 |
*,
|
| 197 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 198 |
) -> list[dict[str, Any]]:
|
| 199 |
+
normalized_where = _normalize_where(self, where, allowed_fields=allowed_fields)
|
| 200 |
if not isinstance(normalized_where, dict) or not normalized_where:
|
| 201 |
return items
|
| 202 |
return [row for row in items if _item_matches_where(self, row, normalized_where)]
|
monty_api/validation.py
CHANGED
|
@@ -155,8 +155,8 @@ def _summarize_limit_hit(helper_name: str, result: Any) -> dict[str, Any] | None
|
|
| 155 |
"truncated": meta.get("truncated"),
|
| 156 |
"truncated_by": meta.get("truncated_by"),
|
| 157 |
"more_available": meta.get("more_available"),
|
| 158 |
-
"
|
| 159 |
-
"
|
| 160 |
"next_request_hint": meta.get("next_request_hint"),
|
| 161 |
}
|
| 162 |
if meta.get("scan_limit") is not None:
|
|
|
|
| 155 |
"truncated": meta.get("truncated"),
|
| 156 |
"truncated_by": meta.get("truncated_by"),
|
| 157 |
"more_available": meta.get("more_available"),
|
| 158 |
+
"requested_limit": meta.get("requested_limit"),
|
| 159 |
+
"applied_limit": meta.get("applied_limit"),
|
| 160 |
"next_request_hint": meta.get("next_request_hint"),
|
| 161 |
}
|
| 162 |
if meta.get("scan_limit") is not None:
|