Deploy structured hf_api_request docstring
Browse files- hf_api_tool.py +309 -35
hf_api_tool.py
CHANGED
|
@@ -46,6 +46,8 @@ ALLOWED_ENDPOINT_PATTERNS: list[str] = [
|
|
| 46 |
r"^/collections/[^/]+/items$",
|
| 47 |
# Auth check
|
| 48 |
r"^/(models|datasets|spaces)/[^/]+/[^/]+/auth-check$",
|
|
|
|
|
|
|
| 49 |
]
|
| 50 |
|
| 51 |
_COMPILED_PATTERNS: list[re.Pattern[str]] = [
|
|
@@ -151,40 +153,13 @@ def _build_url(endpoint: str, params: dict[str, Any] | None) -> str:
|
|
| 151 |
return url
|
| 152 |
|
| 153 |
|
| 154 |
-
def
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
json_body: dict[str, Any] | None
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
) -> dict[str, Any]:
|
| 162 |
-
"""
|
| 163 |
-
Call the Hugging Face Hub API (GET/POST only).
|
| 164 |
-
|
| 165 |
-
Args:
|
| 166 |
-
endpoint: API endpoint relative to /api (e.g. "/whoami-v2").
|
| 167 |
-
method: HTTP method (GET or POST).
|
| 168 |
-
params: Optional query parameters.
|
| 169 |
-
json_body: Optional JSON payload for POST requests.
|
| 170 |
-
max_results: Max results when response is a list (defaults to HF_MAX_RESULTS).
|
| 171 |
-
offset: Client-side offset when response is a list (defaults to 0).
|
| 172 |
-
|
| 173 |
-
Returns:
|
| 174 |
-
A dict with the response data and request metadata.
|
| 175 |
-
"""
|
| 176 |
-
method_upper = method.upper()
|
| 177 |
-
if method_upper not in {"GET", "POST"}:
|
| 178 |
-
raise ValueError("Only GET and POST are allowed for hf_api_request.")
|
| 179 |
-
|
| 180 |
-
if method_upper == "GET" and json_body is not None:
|
| 181 |
-
raise ValueError("GET requests do not accept json_body.")
|
| 182 |
-
|
| 183 |
-
url = _build_url(endpoint, params)
|
| 184 |
-
|
| 185 |
-
headers = {
|
| 186 |
-
"Accept": "application/json",
|
| 187 |
-
}
|
| 188 |
token = _load_token()
|
| 189 |
if token:
|
| 190 |
headers["Authorization"] = f"Bearer {token}"
|
|
@@ -211,14 +186,313 @@ def hf_api_request(
|
|
| 211 |
except json.JSONDecodeError:
|
| 212 |
payload = raw.decode("utf-8", errors="replace")
|
| 213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
if isinstance(payload, list):
|
| 215 |
limit = max_results if max_results is not None else _max_results_from_env()
|
| 216 |
start = max(offset or 0, 0)
|
| 217 |
end = start + max(limit, 0)
|
| 218 |
payload = payload[start:end]
|
| 219 |
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
"url": url,
|
| 222 |
"status": status_code,
|
| 223 |
"data": payload,
|
|
|
|
| 224 |
}
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
r"^/collections/[^/]+/items$",
|
| 47 |
# Auth check
|
| 48 |
r"^/(models|datasets|spaces)/[^/]+/[^/]+/auth-check$",
|
| 49 |
+
# Recent activity feed (undocumented)
|
| 50 |
+
r"^/recent-activity$",
|
| 51 |
]
|
| 52 |
|
| 53 |
_COMPILED_PATTERNS: list[re.Pattern[str]] = [
|
|
|
|
| 153 |
return url
|
| 154 |
|
| 155 |
|
| 156 |
+
def _request_once(
|
| 157 |
+
*,
|
| 158 |
+
url: str,
|
| 159 |
+
method_upper: str,
|
| 160 |
+
json_body: dict[str, Any] | None,
|
| 161 |
+
) -> tuple[int, Any]:
|
| 162 |
+
headers = {"Accept": "application/json"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
token = _load_token()
|
| 164 |
if token:
|
| 165 |
headers["Authorization"] = f"Bearer {token}"
|
|
|
|
| 186 |
except json.JSONDecodeError:
|
| 187 |
payload = raw.decode("utf-8", errors="replace")
|
| 188 |
|
| 189 |
+
return status_code, payload
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def _get_nested_value(obj: Any, path: str) -> Any:
|
| 193 |
+
cur = obj
|
| 194 |
+
for part in [p for p in path.split(".") if p]:
|
| 195 |
+
if isinstance(cur, dict):
|
| 196 |
+
if part not in cur:
|
| 197 |
+
return None
|
| 198 |
+
cur = cur[part]
|
| 199 |
+
elif isinstance(cur, list):
|
| 200 |
+
try:
|
| 201 |
+
idx = int(part)
|
| 202 |
+
except ValueError:
|
| 203 |
+
return None
|
| 204 |
+
if idx < 0 or idx >= len(cur):
|
| 205 |
+
return None
|
| 206 |
+
cur = cur[idx]
|
| 207 |
+
else:
|
| 208 |
+
return None
|
| 209 |
+
return cur
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def _set_nested_value(obj: Any, path: str, value: Any) -> Any:
|
| 213 |
+
if not path:
|
| 214 |
+
return value
|
| 215 |
+
if not isinstance(obj, dict):
|
| 216 |
+
return obj
|
| 217 |
+
|
| 218 |
+
parts = [p for p in path.split(".") if p]
|
| 219 |
+
if not parts:
|
| 220 |
+
return obj
|
| 221 |
+
|
| 222 |
+
cur: Any = obj
|
| 223 |
+
for part in parts[:-1]:
|
| 224 |
+
if not isinstance(cur, dict):
|
| 225 |
+
return obj
|
| 226 |
+
nxt = cur.get(part)
|
| 227 |
+
if not isinstance(nxt, dict):
|
| 228 |
+
nxt = {}
|
| 229 |
+
cur[part] = nxt
|
| 230 |
+
cur = nxt
|
| 231 |
+
|
| 232 |
+
if isinstance(cur, dict):
|
| 233 |
+
cur[parts[-1]] = value
|
| 234 |
+
return obj
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def _apply_local_refine(
|
| 238 |
+
payload: Any,
|
| 239 |
+
*,
|
| 240 |
+
data_path: str | None,
|
| 241 |
+
contains: str | None,
|
| 242 |
+
where: dict[str, Any] | None,
|
| 243 |
+
fields: list[str] | None,
|
| 244 |
+
sort_by: str | None,
|
| 245 |
+
sort_desc: bool,
|
| 246 |
+
max_items: int | None,
|
| 247 |
+
offset: int,
|
| 248 |
+
) -> tuple[Any, dict[str, Any]]:
|
| 249 |
+
# Decide which list to refine
|
| 250 |
+
root_mode = "other"
|
| 251 |
+
target_path = data_path
|
| 252 |
+
|
| 253 |
+
if isinstance(payload, list):
|
| 254 |
+
list_data = payload
|
| 255 |
+
root_mode = "list"
|
| 256 |
+
elif isinstance(payload, dict):
|
| 257 |
+
if target_path:
|
| 258 |
+
maybe_list = _get_nested_value(payload, target_path)
|
| 259 |
+
list_data = maybe_list if isinstance(maybe_list, list) else None
|
| 260 |
+
elif isinstance(payload.get("recentActivity"), list):
|
| 261 |
+
target_path = "recentActivity"
|
| 262 |
+
list_data = payload.get("recentActivity")
|
| 263 |
+
else:
|
| 264 |
+
list_data = None
|
| 265 |
+
root_mode = "dict"
|
| 266 |
+
else:
|
| 267 |
+
return payload, {"refined": False, "reason": "non-json-or-scalar"}
|
| 268 |
+
|
| 269 |
+
if list_data is None:
|
| 270 |
+
return payload, {"refined": False, "reason": "no-list-target"}
|
| 271 |
+
|
| 272 |
+
original_count = len(list_data)
|
| 273 |
+
items = list_data
|
| 274 |
+
|
| 275 |
+
if where:
|
| 276 |
+
def _matches_where(item: Any) -> bool:
|
| 277 |
+
if not isinstance(item, dict):
|
| 278 |
+
return False
|
| 279 |
+
for key, expected in where.items():
|
| 280 |
+
actual = _get_nested_value(item, key)
|
| 281 |
+
if actual != expected:
|
| 282 |
+
return False
|
| 283 |
+
return True
|
| 284 |
+
|
| 285 |
+
items = [item for item in items if _matches_where(item)]
|
| 286 |
+
|
| 287 |
+
if contains:
|
| 288 |
+
needle = contains.lower()
|
| 289 |
+
items = [
|
| 290 |
+
item
|
| 291 |
+
for item in items
|
| 292 |
+
if needle in json.dumps(item, ensure_ascii=False).lower()
|
| 293 |
+
]
|
| 294 |
+
|
| 295 |
+
if sort_by:
|
| 296 |
+
def _sort_key(item: Any) -> Any:
|
| 297 |
+
value = _get_nested_value(item, sort_by) if isinstance(item, dict) else None
|
| 298 |
+
return (value is None, value)
|
| 299 |
+
|
| 300 |
+
items = sorted(items, key=_sort_key, reverse=sort_desc)
|
| 301 |
+
|
| 302 |
+
if fields:
|
| 303 |
+
projected: list[dict[str, Any]] = []
|
| 304 |
+
for item in items:
|
| 305 |
+
if not isinstance(item, dict):
|
| 306 |
+
continue
|
| 307 |
+
row: dict[str, Any] = {}
|
| 308 |
+
for field in fields:
|
| 309 |
+
row[field] = _get_nested_value(item, field)
|
| 310 |
+
projected.append(row)
|
| 311 |
+
items = projected
|
| 312 |
+
|
| 313 |
+
start = max(offset, 0)
|
| 314 |
+
if max_items is not None:
|
| 315 |
+
end = start + max(max_items, 0)
|
| 316 |
+
items = items[start:end]
|
| 317 |
+
elif start:
|
| 318 |
+
items = items[start:]
|
| 319 |
+
|
| 320 |
+
if root_mode == "list":
|
| 321 |
+
refined_payload: Any = items
|
| 322 |
+
effective_path = "<root>"
|
| 323 |
+
else:
|
| 324 |
+
effective_path = target_path or "recentActivity"
|
| 325 |
+
refined_payload = dict(payload)
|
| 326 |
+
_set_nested_value(refined_payload, effective_path, items)
|
| 327 |
+
|
| 328 |
+
refine_meta = {
|
| 329 |
+
"refined": True,
|
| 330 |
+
"data_path": effective_path,
|
| 331 |
+
"original_count": original_count,
|
| 332 |
+
"returned_count": len(items),
|
| 333 |
+
}
|
| 334 |
+
return refined_payload, refine_meta
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def hf_api_request(
|
| 338 |
+
endpoint: str,
|
| 339 |
+
method: str = "GET",
|
| 340 |
+
params: dict[str, Any] | None = None,
|
| 341 |
+
json_body: dict[str, Any] | None = None,
|
| 342 |
+
max_results: int | None = None,
|
| 343 |
+
offset: int | None = None,
|
| 344 |
+
auto_paginate: bool | None = False,
|
| 345 |
+
max_pages: int | None = 1,
|
| 346 |
+
data_path: str | None = None,
|
| 347 |
+
contains: str | None = None,
|
| 348 |
+
where: dict[str, Any] | None = None,
|
| 349 |
+
fields: list[str] | None = None,
|
| 350 |
+
sort_by: str | None = None,
|
| 351 |
+
sort_desc: bool | None = False,
|
| 352 |
+
max_items: int | None = None,
|
| 353 |
+
) -> dict[str, Any]:
|
| 354 |
+
"""
|
| 355 |
+
Primary Hub community API tool (GET/POST only).
|
| 356 |
+
|
| 357 |
+
When to use:
|
| 358 |
+
- User/org intelligence: /users/*, /organizations/*
|
| 359 |
+
- Collaboration flows: /{repo_type}s/{repo_id}/discussions and discussion details
|
| 360 |
+
- Gated access workflows: user-access-request endpoints
|
| 361 |
+
- Collections list/get/create/add-item
|
| 362 |
+
- Recent activity feed via /recent-activity
|
| 363 |
+
|
| 364 |
+
When NOT to use:
|
| 365 |
+
- Model/dataset semantic search/ranking
|
| 366 |
+
- PATCH/DELETE operations (unsupported)
|
| 367 |
+
|
| 368 |
+
Intent-to-parameter guidance:
|
| 369 |
+
- "latest" or "recent": add params limit and sort_by time if needed
|
| 370 |
+
- "top N": use max_items or max_results
|
| 371 |
+
- "mentioning X": use contains
|
| 372 |
+
- "only fields A/B": use fields projection
|
| 373 |
+
- Cursor feeds: use auto_paginate=True with max_pages guard
|
| 374 |
+
|
| 375 |
+
Args:
|
| 376 |
+
endpoint: Endpoint path relative to /api (allowlisted).
|
| 377 |
+
method: GET or POST only.
|
| 378 |
+
params: Query parameters.
|
| 379 |
+
json_body: JSON body for POST.
|
| 380 |
+
max_results: Client-side list cap.
|
| 381 |
+
offset: Client-side list offset.
|
| 382 |
+
auto_paginate: Follow cursor-based pages for GET responses.
|
| 383 |
+
max_pages: Max pages when auto_paginate=True.
|
| 384 |
+
data_path: Dot path to target list (e.g. recentActivity).
|
| 385 |
+
contains: Case-insensitive text match on serialized items.
|
| 386 |
+
where: Exact-match dict using dot notation keys.
|
| 387 |
+
fields: Return only selected fields (dot notation supported).
|
| 388 |
+
sort_by: Dot-notation sort key.
|
| 389 |
+
sort_desc: Descending sort flag.
|
| 390 |
+
max_items: Post-filter cap for returned list.
|
| 391 |
+
|
| 392 |
+
Returns:
|
| 393 |
+
A dict containing request URL, HTTP status, response data, and refine/pagination metadata.
|
| 394 |
+
"""
|
| 395 |
+
method_upper = method.upper()
|
| 396 |
+
|
| 397 |
+
# Tolerate explicit nulls from LLM/tool-calling wrappers
|
| 398 |
+
auto_paginate = bool(auto_paginate) if auto_paginate is not None else False
|
| 399 |
+
sort_desc = bool(sort_desc) if sort_desc is not None else False
|
| 400 |
+
if max_pages is None:
|
| 401 |
+
max_pages = 1
|
| 402 |
+
if method_upper not in {"GET", "POST"}:
|
| 403 |
+
raise ValueError("Only GET and POST are allowed for hf_api_request.")
|
| 404 |
+
|
| 405 |
+
if method_upper == "GET" and json_body is not None:
|
| 406 |
+
raise ValueError("GET requests do not accept json_body.")
|
| 407 |
+
|
| 408 |
+
if auto_paginate and method_upper != "GET":
|
| 409 |
+
raise ValueError("auto_paginate is only supported for GET requests.")
|
| 410 |
+
|
| 411 |
+
if max_pages < 1:
|
| 412 |
+
raise ValueError("max_pages must be >= 1.")
|
| 413 |
+
|
| 414 |
+
req_params = dict(params or {})
|
| 415 |
+
url = _build_url(endpoint, req_params)
|
| 416 |
+
status_code, payload = _request_once(
|
| 417 |
+
url=url,
|
| 418 |
+
method_upper=method_upper,
|
| 419 |
+
json_body=json_body,
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
pages_fetched = 1
|
| 423 |
+
|
| 424 |
+
# Cursor pagination path (e.g. /recent-activity)
|
| 425 |
+
if auto_paginate and isinstance(payload, dict):
|
| 426 |
+
list_key: str | None = None
|
| 427 |
+
if data_path:
|
| 428 |
+
maybe_list = _get_nested_value(payload, data_path)
|
| 429 |
+
if isinstance(maybe_list, list):
|
| 430 |
+
list_key = data_path
|
| 431 |
+
elif isinstance(payload.get("recentActivity"), list):
|
| 432 |
+
list_key = "recentActivity"
|
| 433 |
+
|
| 434 |
+
cursor = payload.get("cursor")
|
| 435 |
+
while list_key and cursor and pages_fetched < max_pages:
|
| 436 |
+
req_params["cursor"] = cursor
|
| 437 |
+
page_url = _build_url(endpoint, req_params)
|
| 438 |
+
_, next_payload = _request_once(
|
| 439 |
+
url=page_url,
|
| 440 |
+
method_upper="GET",
|
| 441 |
+
json_body=None,
|
| 442 |
+
)
|
| 443 |
+
|
| 444 |
+
if not isinstance(next_payload, dict):
|
| 445 |
+
break
|
| 446 |
+
|
| 447 |
+
current_items = _get_nested_value(payload, list_key)
|
| 448 |
+
next_items = _get_nested_value(next_payload, list_key)
|
| 449 |
+
if not isinstance(current_items, list) or not isinstance(next_items, list):
|
| 450 |
+
break
|
| 451 |
+
|
| 452 |
+
_set_nested_value(payload, list_key, current_items + next_items)
|
| 453 |
+
cursor = next_payload.get("cursor")
|
| 454 |
+
payload["cursor"] = cursor
|
| 455 |
+
pages_fetched += 1
|
| 456 |
+
|
| 457 |
+
# Legacy list slicing path
|
| 458 |
if isinstance(payload, list):
|
| 459 |
limit = max_results if max_results is not None else _max_results_from_env()
|
| 460 |
start = max(offset or 0, 0)
|
| 461 |
end = start + max(limit, 0)
|
| 462 |
payload = payload[start:end]
|
| 463 |
|
| 464 |
+
# Local refine path
|
| 465 |
+
refine_requested = any(
|
| 466 |
+
[
|
| 467 |
+
data_path is not None,
|
| 468 |
+
contains is not None,
|
| 469 |
+
where is not None,
|
| 470 |
+
fields is not None,
|
| 471 |
+
sort_by is not None,
|
| 472 |
+
max_items is not None,
|
| 473 |
+
]
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
+
refine_meta: dict[str, Any] | None = None
|
| 477 |
+
if refine_requested:
|
| 478 |
+
payload, refine_meta = _apply_local_refine(
|
| 479 |
+
payload,
|
| 480 |
+
data_path=data_path,
|
| 481 |
+
contains=contains,
|
| 482 |
+
where=where,
|
| 483 |
+
fields=fields,
|
| 484 |
+
sort_by=sort_by,
|
| 485 |
+
sort_desc=sort_desc,
|
| 486 |
+
max_items=max_items,
|
| 487 |
+
offset=max(offset or 0, 0),
|
| 488 |
+
)
|
| 489 |
+
|
| 490 |
+
result = {
|
| 491 |
"url": url,
|
| 492 |
"status": status_code,
|
| 493 |
"data": payload,
|
| 494 |
+
"pages_fetched": pages_fetched,
|
| 495 |
}
|
| 496 |
+
if refine_meta is not None:
|
| 497 |
+
result["refine"] = refine_meta
|
| 498 |
+
return result
|