evalstate HF Staff commited on
Commit
f4d438b
·
verified ·
1 Parent(s): 607531c

Deploy structured hf_api_request docstring

Browse files
Files changed (1) hide show
  1. hf_api_tool.py +309 -35
hf_api_tool.py CHANGED
@@ -46,6 +46,8 @@ ALLOWED_ENDPOINT_PATTERNS: list[str] = [
46
  r"^/collections/[^/]+/items$",
47
  # Auth check
48
  r"^/(models|datasets|spaces)/[^/]+/[^/]+/auth-check$",
 
 
49
  ]
50
 
51
  _COMPILED_PATTERNS: list[re.Pattern[str]] = [
@@ -151,40 +153,13 @@ def _build_url(endpoint: str, params: dict[str, Any] | None) -> str:
151
  return url
152
 
153
 
154
- def hf_api_request(
155
- endpoint: str,
156
- method: str = "GET",
157
- params: dict[str, Any] | None = None,
158
- json_body: dict[str, Any] | None = None,
159
- max_results: int | None = None,
160
- offset: int | None = None,
161
- ) -> dict[str, Any]:
162
- """
163
- Call the Hugging Face Hub API (GET/POST only).
164
-
165
- Args:
166
- endpoint: API endpoint relative to /api (e.g. "/whoami-v2").
167
- method: HTTP method (GET or POST).
168
- params: Optional query parameters.
169
- json_body: Optional JSON payload for POST requests.
170
- max_results: Max results when response is a list (defaults to HF_MAX_RESULTS).
171
- offset: Client-side offset when response is a list (defaults to 0).
172
-
173
- Returns:
174
- A dict with the response data and request metadata.
175
- """
176
- method_upper = method.upper()
177
- if method_upper not in {"GET", "POST"}:
178
- raise ValueError("Only GET and POST are allowed for hf_api_request.")
179
-
180
- if method_upper == "GET" and json_body is not None:
181
- raise ValueError("GET requests do not accept json_body.")
182
-
183
- url = _build_url(endpoint, params)
184
-
185
- headers = {
186
- "Accept": "application/json",
187
- }
188
  token = _load_token()
189
  if token:
190
  headers["Authorization"] = f"Bearer {token}"
@@ -211,14 +186,313 @@ def hf_api_request(
211
  except json.JSONDecodeError:
212
  payload = raw.decode("utf-8", errors="replace")
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  if isinstance(payload, list):
215
  limit = max_results if max_results is not None else _max_results_from_env()
216
  start = max(offset or 0, 0)
217
  end = start + max(limit, 0)
218
  payload = payload[start:end]
219
 
220
- return {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  "url": url,
222
  "status": status_code,
223
  "data": payload,
 
224
  }
 
 
 
 
46
  r"^/collections/[^/]+/items$",
47
  # Auth check
48
  r"^/(models|datasets|spaces)/[^/]+/[^/]+/auth-check$",
49
+ # Recent activity feed (undocumented)
50
+ r"^/recent-activity$",
51
  ]
52
 
53
  _COMPILED_PATTERNS: list[re.Pattern[str]] = [
 
153
  return url
154
 
155
 
156
+ def _request_once(
157
+ *,
158
+ url: str,
159
+ method_upper: str,
160
+ json_body: dict[str, Any] | None,
161
+ ) -> tuple[int, Any]:
162
+ headers = {"Accept": "application/json"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  token = _load_token()
164
  if token:
165
  headers["Authorization"] = f"Bearer {token}"
 
186
  except json.JSONDecodeError:
187
  payload = raw.decode("utf-8", errors="replace")
188
 
189
+ return status_code, payload
190
+
191
+
192
+ def _get_nested_value(obj: Any, path: str) -> Any:
193
+ cur = obj
194
+ for part in [p for p in path.split(".") if p]:
195
+ if isinstance(cur, dict):
196
+ if part not in cur:
197
+ return None
198
+ cur = cur[part]
199
+ elif isinstance(cur, list):
200
+ try:
201
+ idx = int(part)
202
+ except ValueError:
203
+ return None
204
+ if idx < 0 or idx >= len(cur):
205
+ return None
206
+ cur = cur[idx]
207
+ else:
208
+ return None
209
+ return cur
210
+
211
+
212
+ def _set_nested_value(obj: Any, path: str, value: Any) -> Any:
213
+ if not path:
214
+ return value
215
+ if not isinstance(obj, dict):
216
+ return obj
217
+
218
+ parts = [p for p in path.split(".") if p]
219
+ if not parts:
220
+ return obj
221
+
222
+ cur: Any = obj
223
+ for part in parts[:-1]:
224
+ if not isinstance(cur, dict):
225
+ return obj
226
+ nxt = cur.get(part)
227
+ if not isinstance(nxt, dict):
228
+ nxt = {}
229
+ cur[part] = nxt
230
+ cur = nxt
231
+
232
+ if isinstance(cur, dict):
233
+ cur[parts[-1]] = value
234
+ return obj
235
+
236
+
237
+ def _apply_local_refine(
238
+ payload: Any,
239
+ *,
240
+ data_path: str | None,
241
+ contains: str | None,
242
+ where: dict[str, Any] | None,
243
+ fields: list[str] | None,
244
+ sort_by: str | None,
245
+ sort_desc: bool,
246
+ max_items: int | None,
247
+ offset: int,
248
+ ) -> tuple[Any, dict[str, Any]]:
249
+ # Decide which list to refine
250
+ root_mode = "other"
251
+ target_path = data_path
252
+
253
+ if isinstance(payload, list):
254
+ list_data = payload
255
+ root_mode = "list"
256
+ elif isinstance(payload, dict):
257
+ if target_path:
258
+ maybe_list = _get_nested_value(payload, target_path)
259
+ list_data = maybe_list if isinstance(maybe_list, list) else None
260
+ elif isinstance(payload.get("recentActivity"), list):
261
+ target_path = "recentActivity"
262
+ list_data = payload.get("recentActivity")
263
+ else:
264
+ list_data = None
265
+ root_mode = "dict"
266
+ else:
267
+ return payload, {"refined": False, "reason": "non-json-or-scalar"}
268
+
269
+ if list_data is None:
270
+ return payload, {"refined": False, "reason": "no-list-target"}
271
+
272
+ original_count = len(list_data)
273
+ items = list_data
274
+
275
+ if where:
276
+ def _matches_where(item: Any) -> bool:
277
+ if not isinstance(item, dict):
278
+ return False
279
+ for key, expected in where.items():
280
+ actual = _get_nested_value(item, key)
281
+ if actual != expected:
282
+ return False
283
+ return True
284
+
285
+ items = [item for item in items if _matches_where(item)]
286
+
287
+ if contains:
288
+ needle = contains.lower()
289
+ items = [
290
+ item
291
+ for item in items
292
+ if needle in json.dumps(item, ensure_ascii=False).lower()
293
+ ]
294
+
295
+ if sort_by:
296
+ def _sort_key(item: Any) -> Any:
297
+ value = _get_nested_value(item, sort_by) if isinstance(item, dict) else None
298
+ return (value is None, value)
299
+
300
+ items = sorted(items, key=_sort_key, reverse=sort_desc)
301
+
302
+ if fields:
303
+ projected: list[dict[str, Any]] = []
304
+ for item in items:
305
+ if not isinstance(item, dict):
306
+ continue
307
+ row: dict[str, Any] = {}
308
+ for field in fields:
309
+ row[field] = _get_nested_value(item, field)
310
+ projected.append(row)
311
+ items = projected
312
+
313
+ start = max(offset, 0)
314
+ if max_items is not None:
315
+ end = start + max(max_items, 0)
316
+ items = items[start:end]
317
+ elif start:
318
+ items = items[start:]
319
+
320
+ if root_mode == "list":
321
+ refined_payload: Any = items
322
+ effective_path = "<root>"
323
+ else:
324
+ effective_path = target_path or "recentActivity"
325
+ refined_payload = dict(payload)
326
+ _set_nested_value(refined_payload, effective_path, items)
327
+
328
+ refine_meta = {
329
+ "refined": True,
330
+ "data_path": effective_path,
331
+ "original_count": original_count,
332
+ "returned_count": len(items),
333
+ }
334
+ return refined_payload, refine_meta
335
+
336
+
337
+ def hf_api_request(
338
+ endpoint: str,
339
+ method: str = "GET",
340
+ params: dict[str, Any] | None = None,
341
+ json_body: dict[str, Any] | None = None,
342
+ max_results: int | None = None,
343
+ offset: int | None = None,
344
+ auto_paginate: bool | None = False,
345
+ max_pages: int | None = 1,
346
+ data_path: str | None = None,
347
+ contains: str | None = None,
348
+ where: dict[str, Any] | None = None,
349
+ fields: list[str] | None = None,
350
+ sort_by: str | None = None,
351
+ sort_desc: bool | None = False,
352
+ max_items: int | None = None,
353
+ ) -> dict[str, Any]:
354
+ """
355
+ Primary Hub community API tool (GET/POST only).
356
+
357
+ When to use:
358
+ - User/org intelligence: /users/*, /organizations/*
359
+ - Collaboration flows: /{repo_type}s/{repo_id}/discussions and discussion details
360
+ - Gated access workflows: user-access-request endpoints
361
+ - Collections list/get/create/add-item
362
+ - Recent activity feed via /recent-activity
363
+
364
+ When NOT to use:
365
+ - Model/dataset semantic search/ranking
366
+ - PATCH/DELETE operations (unsupported)
367
+
368
+ Intent-to-parameter guidance:
369
+ - "latest" or "recent": add params limit and sort_by time if needed
370
+ - "top N": use max_items or max_results
371
+ - "mentioning X": use contains
372
+ - "only fields A/B": use fields projection
373
+ - Cursor feeds: use auto_paginate=True with max_pages guard
374
+
375
+ Args:
376
+ endpoint: Endpoint path relative to /api (allowlisted).
377
+ method: GET or POST only.
378
+ params: Query parameters.
379
+ json_body: JSON body for POST.
380
+ max_results: Client-side list cap.
381
+ offset: Client-side list offset.
382
+ auto_paginate: Follow cursor-based pages for GET responses.
383
+ max_pages: Max pages when auto_paginate=True.
384
+ data_path: Dot path to target list (e.g. recentActivity).
385
+ contains: Case-insensitive text match on serialized items.
386
+ where: Exact-match dict using dot notation keys.
387
+ fields: Return only selected fields (dot notation supported).
388
+ sort_by: Dot-notation sort key.
389
+ sort_desc: Descending sort flag.
390
+ max_items: Post-filter cap for returned list.
391
+
392
+ Returns:
393
+ A dict containing request URL, HTTP status, response data, and refine/pagination metadata.
394
+ """
395
+ method_upper = method.upper()
396
+
397
+ # Tolerate explicit nulls from LLM/tool-calling wrappers
398
+ auto_paginate = bool(auto_paginate) if auto_paginate is not None else False
399
+ sort_desc = bool(sort_desc) if sort_desc is not None else False
400
+ if max_pages is None:
401
+ max_pages = 1
402
+ if method_upper not in {"GET", "POST"}:
403
+ raise ValueError("Only GET and POST are allowed for hf_api_request.")
404
+
405
+ if method_upper == "GET" and json_body is not None:
406
+ raise ValueError("GET requests do not accept json_body.")
407
+
408
+ if auto_paginate and method_upper != "GET":
409
+ raise ValueError("auto_paginate is only supported for GET requests.")
410
+
411
+ if max_pages < 1:
412
+ raise ValueError("max_pages must be >= 1.")
413
+
414
+ req_params = dict(params or {})
415
+ url = _build_url(endpoint, req_params)
416
+ status_code, payload = _request_once(
417
+ url=url,
418
+ method_upper=method_upper,
419
+ json_body=json_body,
420
+ )
421
+
422
+ pages_fetched = 1
423
+
424
+ # Cursor pagination path (e.g. /recent-activity)
425
+ if auto_paginate and isinstance(payload, dict):
426
+ list_key: str | None = None
427
+ if data_path:
428
+ maybe_list = _get_nested_value(payload, data_path)
429
+ if isinstance(maybe_list, list):
430
+ list_key = data_path
431
+ elif isinstance(payload.get("recentActivity"), list):
432
+ list_key = "recentActivity"
433
+
434
+ cursor = payload.get("cursor")
435
+ while list_key and cursor and pages_fetched < max_pages:
436
+ req_params["cursor"] = cursor
437
+ page_url = _build_url(endpoint, req_params)
438
+ _, next_payload = _request_once(
439
+ url=page_url,
440
+ method_upper="GET",
441
+ json_body=None,
442
+ )
443
+
444
+ if not isinstance(next_payload, dict):
445
+ break
446
+
447
+ current_items = _get_nested_value(payload, list_key)
448
+ next_items = _get_nested_value(next_payload, list_key)
449
+ if not isinstance(current_items, list) or not isinstance(next_items, list):
450
+ break
451
+
452
+ _set_nested_value(payload, list_key, current_items + next_items)
453
+ cursor = next_payload.get("cursor")
454
+ payload["cursor"] = cursor
455
+ pages_fetched += 1
456
+
457
+ # Legacy list slicing path
458
  if isinstance(payload, list):
459
  limit = max_results if max_results is not None else _max_results_from_env()
460
  start = max(offset or 0, 0)
461
  end = start + max(limit, 0)
462
  payload = payload[start:end]
463
 
464
+ # Local refine path
465
+ refine_requested = any(
466
+ [
467
+ data_path is not None,
468
+ contains is not None,
469
+ where is not None,
470
+ fields is not None,
471
+ sort_by is not None,
472
+ max_items is not None,
473
+ ]
474
+ )
475
+
476
+ refine_meta: dict[str, Any] | None = None
477
+ if refine_requested:
478
+ payload, refine_meta = _apply_local_refine(
479
+ payload,
480
+ data_path=data_path,
481
+ contains=contains,
482
+ where=where,
483
+ fields=fields,
484
+ sort_by=sort_by,
485
+ sort_desc=sort_desc,
486
+ max_items=max_items,
487
+ offset=max(offset or 0, 0),
488
+ )
489
+
490
+ result = {
491
  "url": url,
492
  "status": status_code,
493
  "data": payload,
494
+ "pages_fetched": pages_fetched,
495
  }
496
+ if refine_meta is not None:
497
+ result["refine"] = refine_meta
498
+ return result