evalstate HF Staff commited on
Commit
9404142
·
verified ·
1 Parent(s): 4512510

Deploy hf-hub-query bundle for f8275ae

Browse files
Files changed (2) hide show
  1. _monty_codegen_shared.md +23 -0
  2. monty_api_tool_v2.py +47 -5
_monty_codegen_shared.md CHANGED
@@ -180,6 +180,7 @@ await hf_repo_discussion_details(
180
 
181
  await hf_collections_search(
182
  query: str | None = None,
 
183
  return_limit: int = 20,
184
  count_only: bool = False,
185
  where: dict | None = None,
@@ -254,6 +255,26 @@ Common aliases tolerated in `fields=[...]` include:
254
  When returning compact repo objects, omit unavailable optional fields instead of
255
  emitting `null` placeholders unless the user explicitly asked for a fixed schema.
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  ## Common user overview fields
258
  `hf_user_summary(... )["item"]["overview"]` commonly includes:
259
  - `username`
@@ -302,6 +323,7 @@ Choose the helper based on the **subject of the question** and the **smallest he
302
  - Organization details / counts → `hf_org_overview(...)`
303
  - Organization members → `hf_org_members(...)`
304
  - Organization repos → `hf_repo_search(author="<org>", repo_types=["model", "dataset", "space"])`
 
305
 
306
  ### Relationship direction matters
307
  - `hf_user_likes(...)` = **user → repos**
@@ -335,6 +357,7 @@ Pick the helper that already matches the direction of the question instead of tr
335
  - For "my/me" prompts, prefer current-user forms first: `hf_user_summary(username=None)`, `hf_user_graph(username=None, ...)`, and `hf_user_likes(username=None, ...)`. Use `hf_whoami()` when you need the resolved username explicitly.
336
  - Use `hf_org_overview(...)` for organization details like display name, followers, and member count.
337
  - Use `hf_org_members(...)` for organization member lists and counts. Member rows use `username`, `fullname`, `isPro`, and `role`; common aliases like `login`, `name`, and `is_pro` are tolerated in `fields=[...]`.
 
338
  - Use `hf_user_graph(...)` for follower/following lists, counts, and filtered graph samples. Prefer `relation=` over trying undocumented helper names.
339
  - Use `hf_repo_likers(...)` for "who liked this repo?" prompts. It returns liker rows for a specific model, dataset, or space; pass `repo_type` explicitly.
340
  - For overlap/comparison/ranking tasks over followers, org members, likes, or activity, do not use small manual `return_limit` values like 10/20/50 unless the user explicitly asked for a sample. Use the helper default or a clearly high bound for the intermediate analysis, then keep only the final displayed result compact.
 
180
 
181
  await hf_collections_search(
182
  query: str | None = None,
183
+ owner: str | None = None,
184
  return_limit: int = 20,
185
  count_only: bool = False,
186
  where: dict | None = None,
 
255
  When returning compact repo objects, omit unavailable optional fields instead of
256
  emitting `null` placeholders unless the user explicitly asked for a fixed schema.
257
 
258
+ ## Common collection fields
259
+ Collection search rows commonly include:
260
+ - `collection_id`
261
+ - `slug`
262
+ - `title`
263
+ - `owner`
264
+ - `owner_type`
265
+ - `description`
266
+ - `gating`
267
+ - `last_updated`
268
+ - `item_count`
269
+
270
+ For collection helpers, prefer the canonical names above in generated code and in `fields=[...]`.
271
+ Common aliases tolerated in `fields=[...]` include:
272
+ - `collectionId` → `collection_id`
273
+ - `lastUpdated` → `last_updated`
274
+ - `ownerType` → `owner_type`
275
+ - `itemCount` → `item_count`
276
+ - `author` → `owner`
277
+
278
  ## Common user overview fields
279
  `hf_user_summary(... )["item"]["overview"]` commonly includes:
280
  - `username`
 
323
  - Organization details / counts → `hf_org_overview(...)`
324
  - Organization members → `hf_org_members(...)`
325
  - Organization repos → `hf_repo_search(author="<org>", repo_types=["model", "dataset", "space"])`
326
+ - Organization/user collections → `hf_collections_search(owner="<org-or-user>", ...)`
327
 
328
  ### Relationship direction matters
329
  - `hf_user_likes(...)` = **user → repos**
 
357
  - For "my/me" prompts, prefer current-user forms first: `hf_user_summary(username=None)`, `hf_user_graph(username=None, ...)`, and `hf_user_likes(username=None, ...)`. Use `hf_whoami()` when you need the resolved username explicitly.
358
  - Use `hf_org_overview(...)` for organization details like display name, followers, and member count.
359
  - Use `hf_org_members(...)` for organization member lists and counts. Member rows use `username`, `fullname`, `isPro`, and `role`; common aliases like `login`, `name`, and `is_pro` are tolerated in `fields=[...]`.
360
+ - Use `hf_collections_search(...)` for collection search/listing questions. For "what collections does this org/user have?" prompts, pass `owner="<org-or-user>"` so the helper seeds query search and then applies an exact owner filter locally. Prefer fields like `collection_id`, `title`, `owner`, `description`, `last_updated`, and `item_count`.
361
  - Use `hf_user_graph(...)` for follower/following lists, counts, and filtered graph samples. Prefer `relation=` over trying undocumented helper names.
362
  - Use `hf_repo_likers(...)` for "who liked this repo?" prompts. It returns liker rows for a specific model, dataset, or space; pass `repo_type` explicitly.
363
  - For overlap/comparison/ranking tasks over followers, org members, likes, or activity, do not use small manual `return_limit` values like 10/20/50 unless the user explicitly asked for a sample. Use the helper default or a clearly high bound for the intermediate analysis, then keep only the final displayed result compact.
monty_api_tool_v2.py CHANGED
@@ -143,6 +143,14 @@ _REPO_FIELD_ALIASES: dict[str, str] = {
143
  "paperswithcodeid": "paperswithcode_id",
144
  }
145
 
 
 
 
 
 
 
 
 
146
  # Extra hf_repo_search kwargs intentionally supported as pass-through to
147
  # huggingface_hub.HfApi.list_models/list_datasets/list_spaces.
148
  # (Generic args like `query/search/sort/author/limit` are handled directly in
@@ -1311,6 +1319,9 @@ async def _run_with_monty(
1311
  def _project_repo_items(items: list[dict[str, Any]], fields: list[str] | None) -> list[dict[str, Any]]:
1312
  return _project_items(items, fields, aliases=_REPO_FIELD_ALIASES)
1313
 
 
 
 
1314
  def _project_user_items(items: list[dict[str, Any]], fields: list[str] | None) -> list[dict[str, Any]]:
1315
  return _project_items(items, fields, aliases=_USER_FIELD_ALIASES)
1316
 
@@ -3146,6 +3157,7 @@ async def _run_with_monty(
3146
 
3147
  async def hf_collections_search(
3148
  query: str | None = None,
 
3149
  return_limit: int = 20,
3150
  count_only: bool = False,
3151
  where: dict[str, Any] | None = None,
@@ -3159,13 +3171,24 @@ async def _run_with_monty(
3159
  return_limit = 0
3160
 
3161
  lim = _clamp_int(return_limit, default=default_return, minimum=0, maximum=max_return)
3162
- fetch_lim = max_return if lim == 0 else lim
 
 
 
3163
 
3164
  term = str(query or "").strip()
 
 
3165
  if not term:
3166
- return _helper_error(start_calls=start_calls, source="/api/collections", error="query is required")
3167
 
3168
- resp = _host_raw_call("/api/collections", params={"q": term, "limit": fetch_lim})
 
 
 
 
 
 
3169
  if not resp.get("ok"):
3170
  return _helper_error(
3171
  start_calls=start_calls,
@@ -3181,12 +3204,30 @@ async def _run_with_monty(
3181
  owner = _author_from_any(row.get("owner")) or _author_from_any(row.get("ownerData"))
3182
  if not owner and isinstance(row.get("slug"), str) and "/" in str(row.get("slug")):
3183
  owner = str(row.get("slug")).split("/", 1)[0]
3184
- items.append({"slug": row.get("slug"), "title": row.get("title"), "owner": owner})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3185
 
3186
  items = _apply_where(items, where)
3187
  total_matched = len(items)
3188
  items = items[:lim]
3189
- items = _project_items(items, fields)
3190
  truncated = (lim > 0 and total_matched > lim) or (lim == 0 and len(payload) >= fetch_lim)
3191
 
3192
  return _helper_success(
@@ -3202,6 +3243,7 @@ async def _run_with_monty(
3202
  truncated=truncated,
3203
  complete=not truncated,
3204
  query=term,
 
3205
  )
3206
 
3207
  m = pydantic_monty.Monty(
 
143
  "paperswithcodeid": "paperswithcode_id",
144
  }
145
 
146
+ _COLLECTION_FIELD_ALIASES: dict[str, str] = {
147
+ "collectionid": "collection_id",
148
+ "lastupdated": "last_updated",
149
+ "ownertype": "owner_type",
150
+ "itemcount": "item_count",
151
+ "author": "owner",
152
+ }
153
+
154
  # Extra hf_repo_search kwargs intentionally supported as pass-through to
155
  # huggingface_hub.HfApi.list_models/list_datasets/list_spaces.
156
  # (Generic args like `query/search/sort/author/limit` are handled directly in
 
1319
  def _project_repo_items(items: list[dict[str, Any]], fields: list[str] | None) -> list[dict[str, Any]]:
1320
  return _project_items(items, fields, aliases=_REPO_FIELD_ALIASES)
1321
 
1322
+ def _project_collection_items(items: list[dict[str, Any]], fields: list[str] | None) -> list[dict[str, Any]]:
1323
+ return _project_items(items, fields, aliases=_COLLECTION_FIELD_ALIASES)
1324
+
1325
  def _project_user_items(items: list[dict[str, Any]], fields: list[str] | None) -> list[dict[str, Any]]:
1326
  return _project_items(items, fields, aliases=_USER_FIELD_ALIASES)
1327
 
 
3157
 
3158
  async def hf_collections_search(
3159
  query: str | None = None,
3160
+ owner: str | None = None,
3161
  return_limit: int = 20,
3162
  count_only: bool = False,
3163
  where: dict[str, Any] | None = None,
 
3171
  return_limit = 0
3172
 
3173
  lim = _clamp_int(return_limit, default=default_return, minimum=0, maximum=max_return)
3174
+ owner_clean = str(owner or "").strip() or None
3175
+ fetch_lim = max_return if lim == 0 or owner_clean else lim
3176
+ if owner_clean:
3177
+ fetch_lim = min(fetch_lim, 100)
3178
 
3179
  term = str(query or "").strip()
3180
+ if not term and owner_clean:
3181
+ term = owner_clean
3182
  if not term:
3183
+ return _helper_error(start_calls=start_calls, source="/api/collections", error="query or owner is required")
3184
 
3185
+ params: dict[str, Any] = {"limit": fetch_lim}
3186
+ if term:
3187
+ params["q"] = term
3188
+ if owner_clean:
3189
+ params["owner"] = owner_clean
3190
+
3191
+ resp = _host_raw_call("/api/collections", params=params)
3192
  if not resp.get("ok"):
3193
  return _helper_error(
3194
  start_calls=start_calls,
 
3204
  owner = _author_from_any(row.get("owner")) or _author_from_any(row.get("ownerData"))
3205
  if not owner and isinstance(row.get("slug"), str) and "/" in str(row.get("slug")):
3206
  owner = str(row.get("slug")).split("/", 1)[0]
3207
+ if owner_clean is not None and owner != owner_clean:
3208
+ continue
3209
+
3210
+ owner_payload = row.get("owner") if isinstance(row.get("owner"), dict) else {}
3211
+ collection_items = row.get("items") if isinstance(row.get("items"), list) else []
3212
+ slug = row.get("slug")
3213
+ items.append(
3214
+ {
3215
+ "collection_id": slug,
3216
+ "slug": slug,
3217
+ "title": row.get("title"),
3218
+ "owner": owner,
3219
+ "owner_type": owner_payload.get("type") if isinstance(owner_payload.get("type"), str) else None,
3220
+ "description": row.get("description"),
3221
+ "gating": row.get("gating"),
3222
+ "last_updated": row.get("lastUpdated"),
3223
+ "item_count": len(collection_items),
3224
+ }
3225
+ )
3226
 
3227
  items = _apply_where(items, where)
3228
  total_matched = len(items)
3229
  items = items[:lim]
3230
+ items = _project_collection_items(items, fields)
3231
  truncated = (lim > 0 and total_matched > lim) or (lim == 0 and len(payload) >= fetch_lim)
3232
 
3233
  return _helper_success(
 
3243
  truncated=truncated,
3244
  complete=not truncated,
3245
  query=term,
3246
+ owner=owner_clean,
3247
  )
3248
 
3249
  m = pydantic_monty.Monty(