Spaces:
Running
Running
Deploy committed Monty runtime refactor
Browse files- hf-hub-query.md +2 -2
- monty_api/aliases.py +0 -88
- monty_api/helpers/activity.py +28 -12
- monty_api/helpers/profiles.py +92 -51
- monty_api/http_runtime.py +88 -16
- monty_api/query_entrypoints.py +27 -0
- monty_api/runtime_context.py +4 -0
- monty_api/runtime_envelopes.py +35 -36
- monty_api/runtime_filtering.py +65 -32
- monty_api/tool_entrypoints.py +35 -1
- monty_api/validation.py +2 -2
- tool_entrypoints.py +35 -1
hf-hub-query.md
CHANGED
|
@@ -4,7 +4,7 @@ name: hf_hub_query
|
|
| 4 |
model: hf.openai/gpt-oss-120b:sambanova
|
| 5 |
use_history: false
|
| 6 |
default: true
|
| 7 |
-
description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound
|
| 8 |
shell: false
|
| 9 |
skills: []
|
| 10 |
function_tools:
|
|
@@ -32,7 +32,7 @@ The user must never see your generated Python unless they explicitly ask for deb
|
|
| 32 |
- The return value of `solve(...)` is the user-facing payload.
|
| 33 |
- Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
|
| 34 |
- For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
|
| 35 |
-
-
|
| 36 |
- Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
|
| 37 |
- When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
|
| 38 |
- Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.
|
|
|
|
| 4 |
model: hf.openai/gpt-oss-120b:sambanova
|
| 5 |
use_history: false
|
| 6 |
default: true
|
| 7 |
+
description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, and max_pages for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
|
| 8 |
shell: false
|
| 9 |
skills: []
|
| 10 |
function_tools:
|
|
|
|
| 32 |
- The return value of `solve(...)` is the user-facing payload.
|
| 33 |
- Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
|
| 34 |
- For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
|
| 35 |
+
- Prefer returning outputs directly unless post-processing is required. Do **NOT** rename fields unless asked specifically.
|
| 36 |
- Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
|
| 37 |
- When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
|
| 38 |
- Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.
|
monty_api/aliases.py
CHANGED
|
@@ -29,91 +29,3 @@ REPO_SORT_KEYS: dict[str, set[str]] = {
|
|
| 29 |
"trending_score",
|
| 30 |
},
|
| 31 |
}
|
| 32 |
-
|
| 33 |
-
# Alias policy:
|
| 34 |
-
# - canonical names stay canonical
|
| 35 |
-
# - support a small compatibility set for observed prompt/output variants
|
| 36 |
-
# - do not add speculative synonyms unless they appear in prompts, evals, or
|
| 37 |
-
# upstream payloads we already normalize
|
| 38 |
-
SORT_KEY_ALIASES: dict[str, str] = {
|
| 39 |
-
"createdat": "created_at",
|
| 40 |
-
"created_at": "created_at",
|
| 41 |
-
"created-at": "created_at",
|
| 42 |
-
"downloads": "downloads",
|
| 43 |
-
"likes": "likes",
|
| 44 |
-
"lastmodified": "last_modified",
|
| 45 |
-
"last_modified": "last_modified",
|
| 46 |
-
"last-modified": "last_modified",
|
| 47 |
-
"trendingscore": "trending_score",
|
| 48 |
-
"trending_score": "trending_score",
|
| 49 |
-
"trending-score": "trending_score",
|
| 50 |
-
"trending": "trending_score",
|
| 51 |
-
}
|
| 52 |
-
|
| 53 |
-
USER_FIELD_ALIASES: dict[str, str] = {
|
| 54 |
-
"login": "username",
|
| 55 |
-
"user": "username",
|
| 56 |
-
"handle": "username",
|
| 57 |
-
"name": "fullname",
|
| 58 |
-
"full_name": "fullname",
|
| 59 |
-
"is_pro": "isPro",
|
| 60 |
-
"pro": "isPro",
|
| 61 |
-
}
|
| 62 |
-
|
| 63 |
-
ACTOR_FIELD_ALIASES: dict[str, str] = {
|
| 64 |
-
**USER_FIELD_ALIASES,
|
| 65 |
-
"entity_type": "type",
|
| 66 |
-
"user_type": "type",
|
| 67 |
-
}
|
| 68 |
-
|
| 69 |
-
REPO_FIELD_ALIASES: dict[str, str] = {
|
| 70 |
-
"repoid": "repo_id",
|
| 71 |
-
"repotype": "repo_type",
|
| 72 |
-
"repourl": "repo_url",
|
| 73 |
-
"createdat": "created_at",
|
| 74 |
-
"lastmodified": "last_modified",
|
| 75 |
-
"pipelinetag": "pipeline_tag",
|
| 76 |
-
"numparams": "num_params",
|
| 77 |
-
"trendingrank": "trending_rank",
|
| 78 |
-
"trendingscore": "trending_score",
|
| 79 |
-
"libraryname": "library_name",
|
| 80 |
-
"paperswithcodeid": "paperswithcode_id",
|
| 81 |
-
}
|
| 82 |
-
|
| 83 |
-
COLLECTION_FIELD_ALIASES: dict[str, str] = {
|
| 84 |
-
"collectionid": "collection_id",
|
| 85 |
-
"lastupdated": "last_updated",
|
| 86 |
-
"ownertype": "owner_type",
|
| 87 |
-
"itemcount": "item_count",
|
| 88 |
-
"author": "owner",
|
| 89 |
-
}
|
| 90 |
-
|
| 91 |
-
DAILY_PAPER_FIELD_ALIASES: dict[str, str] = {
|
| 92 |
-
"paperid": "paper_id",
|
| 93 |
-
"publishedat": "published_at",
|
| 94 |
-
"submittedondailyat": "submitted_on_daily_at",
|
| 95 |
-
"submittedby": "submitted_by",
|
| 96 |
-
"discussionid": "discussion_id",
|
| 97 |
-
"githubrepo": "github_repo_url",
|
| 98 |
-
"githubstars": "github_stars",
|
| 99 |
-
"projectpage": "project_page_url",
|
| 100 |
-
"numcomments": "num_comments",
|
| 101 |
-
"isauthorparticipating": "is_author_participating",
|
| 102 |
-
"repoid": "repo_id",
|
| 103 |
-
}
|
| 104 |
-
|
| 105 |
-
USER_LIKES_FIELD_ALIASES: dict[str, str] = {
|
| 106 |
-
"likedat": "liked_at",
|
| 107 |
-
"repoid": "repo_id",
|
| 108 |
-
"repotype": "repo_type",
|
| 109 |
-
"repoauthor": "repo_author",
|
| 110 |
-
"repolikes": "repo_likes",
|
| 111 |
-
"repodownloads": "repo_downloads",
|
| 112 |
-
}
|
| 113 |
-
|
| 114 |
-
ACTIVITY_FIELD_ALIASES: dict[str, str] = {
|
| 115 |
-
"time": "timestamp",
|
| 116 |
-
"type": "event_type",
|
| 117 |
-
"repoid": "repo_id",
|
| 118 |
-
"repotype": "repo_type",
|
| 119 |
-
}
|
|
|
|
| 29 |
"trending_score",
|
| 30 |
},
|
| 31 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
monty_api/helpers/activity.py
CHANGED
|
@@ -4,8 +4,8 @@ from __future__ import annotations
|
|
| 4 |
from functools import partial
|
| 5 |
from typing import Any, Callable
|
| 6 |
|
| 7 |
-
from ..aliases import ACTIVITY_FIELD_ALIASES
|
| 8 |
from ..constants import (
|
|
|
|
| 9 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 10 |
RECENT_ACTIVITY_PAGE_SIZE,
|
| 11 |
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
|
@@ -19,7 +19,7 @@ async def hf_recent_activity(
|
|
| 19 |
entity: str | None = None,
|
| 20 |
activity_types: list[str] | None = None,
|
| 21 |
repo_types: list[str] | None = None,
|
| 22 |
-
|
| 23 |
max_pages: int | None = None,
|
| 24 |
start_cursor: str | None = None,
|
| 25 |
count_only: bool = False,
|
|
@@ -27,7 +27,7 @@ async def hf_recent_activity(
|
|
| 27 |
fields: list[str] | None = None,
|
| 28 |
) -> dict[str, Any]:
|
| 29 |
start_calls = ctx.call_count["n"]
|
| 30 |
-
|
| 31 |
page_cap = ctx._policy_int(
|
| 32 |
"hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
|
| 33 |
)
|
|
@@ -56,12 +56,12 @@ async def hf_recent_activity(
|
|
| 56 |
error="entity is required",
|
| 57 |
)
|
| 58 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 59 |
-
|
| 60 |
count_only=count_only,
|
| 61 |
-
|
| 62 |
-
|
| 63 |
)
|
| 64 |
-
|
| 65 |
page_lim = page_cap
|
| 66 |
pages_lim = ctx._clamp_int(
|
| 67 |
requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
|
|
@@ -85,8 +85,17 @@ async def hf_recent_activity(
|
|
| 85 |
pages = 0
|
| 86 |
exhausted_feed = False
|
| 87 |
stopped_for_budget = False
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
if ctx._budget_remaining() <= 0:
|
| 91 |
stopped_for_budget = True
|
| 92 |
break
|
|
@@ -147,15 +156,22 @@ async def hf_recent_activity(
|
|
| 147 |
if not ctx._item_matches_where(item, normalized_where):
|
| 148 |
continue
|
| 149 |
matched += 1
|
| 150 |
-
if len(items) <
|
| 151 |
items.append(item)
|
| 152 |
if not next_cursor:
|
| 153 |
exhausted_feed = True
|
| 154 |
break
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
exact_count = exhausted_feed and (not stopped_for_budget)
|
| 157 |
sample_complete = (
|
| 158 |
-
exact_count and
|
| 159 |
)
|
| 160 |
page_limit_hit = (
|
| 161 |
next_cursor is not None and pages >= pages_lim and (not exhausted_feed)
|
|
|
|
| 4 |
from functools import partial
|
| 5 |
from typing import Any, Callable
|
| 6 |
|
|
|
|
| 7 |
from ..constants import (
|
| 8 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 9 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 10 |
RECENT_ACTIVITY_PAGE_SIZE,
|
| 11 |
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
|
|
|
| 19 |
entity: str | None = None,
|
| 20 |
activity_types: list[str] | None = None,
|
| 21 |
repo_types: list[str] | None = None,
|
| 22 |
+
limit: int | None = None,
|
| 23 |
max_pages: int | None = None,
|
| 24 |
start_cursor: str | None = None,
|
| 25 |
count_only: bool = False,
|
|
|
|
| 27 |
fields: list[str] | None = None,
|
| 28 |
) -> dict[str, Any]:
|
| 29 |
start_calls = ctx.call_count["n"]
|
| 30 |
+
default_limit = ctx._policy_int("hf_recent_activity", "default_limit", 100)
|
| 31 |
page_cap = ctx._policy_int(
|
| 32 |
"hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
|
| 33 |
)
|
|
|
|
| 56 |
error="entity is required",
|
| 57 |
)
|
| 58 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 59 |
+
limit=limit,
|
| 60 |
count_only=count_only,
|
| 61 |
+
default_limit=default_limit,
|
| 62 |
+
max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 63 |
)
|
| 64 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 65 |
page_lim = page_cap
|
| 66 |
pages_lim = ctx._clamp_int(
|
| 67 |
requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
|
|
|
|
| 85 |
pages = 0
|
| 86 |
exhausted_feed = False
|
| 87 |
stopped_for_budget = False
|
| 88 |
+
try:
|
| 89 |
+
normalized_where = ctx._normalize_where(
|
| 90 |
+
where, allowed_fields=ACTIVITY_CANONICAL_FIELDS
|
| 91 |
+
)
|
| 92 |
+
except ValueError as exc:
|
| 93 |
+
return ctx._helper_error(
|
| 94 |
+
start_calls=start_calls,
|
| 95 |
+
source="/api/recent-activity",
|
| 96 |
+
error=exc,
|
| 97 |
+
)
|
| 98 |
+
while pages < pages_lim and (applied_limit == 0 or len(items) < applied_limit):
|
| 99 |
if ctx._budget_remaining() <= 0:
|
| 100 |
stopped_for_budget = True
|
| 101 |
break
|
|
|
|
| 156 |
if not ctx._item_matches_where(item, normalized_where):
|
| 157 |
continue
|
| 158 |
matched += 1
|
| 159 |
+
if len(items) < applied_limit:
|
| 160 |
items.append(item)
|
| 161 |
if not next_cursor:
|
| 162 |
exhausted_feed = True
|
| 163 |
break
|
| 164 |
+
try:
|
| 165 |
+
items = ctx._project_activity_items(items, fields)
|
| 166 |
+
except ValueError as exc:
|
| 167 |
+
return ctx._helper_error(
|
| 168 |
+
start_calls=start_calls,
|
| 169 |
+
source="/api/recent-activity",
|
| 170 |
+
error=exc,
|
| 171 |
+
)
|
| 172 |
exact_count = exhausted_feed and (not stopped_for_budget)
|
| 173 |
sample_complete = (
|
| 174 |
+
exact_count and applied_limit >= matched and (not count_only or matched == 0)
|
| 175 |
)
|
| 176 |
page_limit_hit = (
|
| 177 |
next_cursor is not None and pages >= pages_lim and (not exhausted_feed)
|
monty_api/helpers/profiles.py
CHANGED
|
@@ -5,11 +5,8 @@ from itertools import islice
|
|
| 5 |
import re
|
| 6 |
from typing import Any, Callable
|
| 7 |
from ..context_types import HelperRuntimeContext
|
| 8 |
-
from ..aliases import (
|
| 9 |
-
ACTOR_FIELD_ALIASES,
|
| 10 |
-
USER_FIELD_ALIASES,
|
| 11 |
-
)
|
| 12 |
from ..constants import (
|
|
|
|
| 13 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 14 |
GRAPH_SCAN_LIMIT_CAP,
|
| 15 |
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
|
@@ -74,7 +71,7 @@ async def hf_whoami(ctx: HelperRuntimeContext) -> dict[str, Any]:
|
|
| 74 |
item = {
|
| 75 |
"username": username,
|
| 76 |
"fullname": payload.get("fullname"),
|
| 77 |
-
"
|
| 78 |
}
|
| 79 |
items = [item] if isinstance(username, str) and username else []
|
| 80 |
return ctx._helper_success(
|
|
@@ -148,16 +145,16 @@ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[st
|
|
| 148 |
"username": obj.username or u,
|
| 149 |
"fullname": obj.fullname,
|
| 150 |
"bio": getattr(obj, "details", None),
|
| 151 |
-
"
|
| 152 |
-
"
|
| 153 |
"twitter": _social_url("twitter", twitter_handle),
|
| 154 |
"github": _social_url("github", github_handle),
|
| 155 |
"linkedin": _social_url("linkedin", linkedin_handle),
|
| 156 |
"bluesky": _social_url("bluesky", bluesky_handle),
|
| 157 |
-
"
|
| 158 |
-
"
|
| 159 |
-
"
|
| 160 |
-
"
|
| 161 |
"followers": ctx._as_int(obj.num_followers),
|
| 162 |
"following": ctx._as_int(obj.num_following),
|
| 163 |
"likes": ctx._as_int(obj.num_likes),
|
|
@@ -168,7 +165,7 @@ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[st
|
|
| 168 |
"papers": ctx._as_int(getattr(obj, "num_papers", None)),
|
| 169 |
"upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
|
| 170 |
"orgs": org_names,
|
| 171 |
-
"
|
| 172 |
}
|
| 173 |
return ctx._helper_success(
|
| 174 |
start_calls=start_calls,
|
|
@@ -202,10 +199,10 @@ async def _hf_org_overview(
|
|
| 202 |
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 203 |
item = {
|
| 204 |
"organization": obj.name or org,
|
| 205 |
-
"
|
| 206 |
-
"
|
| 207 |
"description": obj.details,
|
| 208 |
-
"
|
| 209 |
"followers": ctx._as_int(obj.num_followers),
|
| 210 |
"members": ctx._as_int(obj.num_users),
|
| 211 |
"models": ctx._as_int(getattr(obj, "num_models", None)),
|
|
@@ -226,7 +223,7 @@ async def _hf_org_overview(
|
|
| 226 |
async def hf_org_members(
|
| 227 |
ctx: HelperRuntimeContext,
|
| 228 |
organization: str,
|
| 229 |
-
|
| 230 |
scan_limit: int | None = None,
|
| 231 |
count_only: bool = False,
|
| 232 |
where: dict[str, Any] | None = None,
|
|
@@ -240,17 +237,17 @@ async def hf_org_members(
|
|
| 240 |
source="/api/organizations/<o>/members",
|
| 241 |
error="organization is required",
|
| 242 |
)
|
| 243 |
-
|
| 244 |
scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 245 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 246 |
-
|
| 247 |
count_only=count_only,
|
| 248 |
-
|
| 249 |
-
|
| 250 |
scan_limit=scan_limit,
|
| 251 |
scan_cap=scan_cap,
|
| 252 |
)
|
| 253 |
-
|
| 254 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 255 |
has_where = isinstance(where, dict) and bool(where)
|
| 256 |
overview_total: int | None = None
|
|
@@ -299,11 +296,21 @@ async def hf_org_members(
|
|
| 299 |
item = {
|
| 300 |
"username": handle,
|
| 301 |
"fullname": getattr(row, "fullname", None),
|
| 302 |
-
"
|
| 303 |
"role": getattr(row, "role", None),
|
| 304 |
}
|
| 305 |
normalized.append(item)
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
observed_total = len(rows)
|
| 308 |
scan_exhaustive = observed_total < scan_lim
|
| 309 |
overview_list_mismatch = (
|
|
@@ -324,14 +331,14 @@ async def hf_org_members(
|
|
| 324 |
total = observed_total
|
| 325 |
total_matched = observed_total
|
| 326 |
total_available = overview_total if overview_total is not None else observed_total
|
| 327 |
-
items = normalized[:
|
| 328 |
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 329 |
count_source = (
|
| 330 |
"overview" if overview_total is not None and (not has_where) else "scan"
|
| 331 |
)
|
| 332 |
sample_complete = (
|
| 333 |
exact_count
|
| 334 |
-
and len(normalized) <=
|
| 335 |
and (not count_only or len(normalized) == 0)
|
| 336 |
)
|
| 337 |
more_available = ctx._derive_more_available(
|
|
@@ -342,7 +349,15 @@ async def hf_org_members(
|
|
| 342 |
)
|
| 343 |
if not exact_count and scan_limit_hit:
|
| 344 |
more_available = "unknown" if has_where else True
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
meta = ctx._build_exhaustive_result_meta(
|
| 347 |
base_meta={
|
| 348 |
"scanned": observed_total,
|
|
@@ -375,7 +390,7 @@ async def _user_graph_helper(
|
|
| 375 |
kind: str,
|
| 376 |
username: str,
|
| 377 |
pro_only: bool | None,
|
| 378 |
-
|
| 379 |
scan_limit: int | None,
|
| 380 |
count_only: bool,
|
| 381 |
where: dict[str, Any] | None,
|
|
@@ -384,10 +399,10 @@ async def _user_graph_helper(
|
|
| 384 |
helper_name: str,
|
| 385 |
) -> dict[str, Any]:
|
| 386 |
start_calls = ctx.call_count["n"]
|
| 387 |
-
|
| 388 |
scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 389 |
-
|
| 390 |
-
helper_name, "
|
| 391 |
)
|
| 392 |
u = str(username or "").strip()
|
| 393 |
if not u:
|
|
@@ -397,14 +412,14 @@ async def _user_graph_helper(
|
|
| 397 |
error="username is required",
|
| 398 |
)
|
| 399 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 400 |
-
|
| 401 |
count_only=count_only,
|
| 402 |
-
|
| 403 |
-
|
| 404 |
scan_limit=scan_limit,
|
| 405 |
scan_cap=scan_cap,
|
| 406 |
)
|
| 407 |
-
|
| 408 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 409 |
has_where = isinstance(where, dict) and bool(where)
|
| 410 |
filtered = pro_only is not None or has_where
|
|
@@ -509,14 +524,28 @@ async def _user_graph_helper(
|
|
| 509 |
item = {
|
| 510 |
"username": handle,
|
| 511 |
"fullname": getattr(row, "fullname", None),
|
| 512 |
-
"
|
| 513 |
}
|
| 514 |
-
if pro_only is True and item.get("
|
| 515 |
continue
|
| 516 |
-
if pro_only is False and item.get("
|
| 517 |
continue
|
| 518 |
normalized.append(item)
|
| 519 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 520 |
observed_total = len(rows)
|
| 521 |
scan_exhaustive = observed_total < scan_lim
|
| 522 |
overview_list_mismatch = (
|
|
@@ -537,14 +566,14 @@ async def _user_graph_helper(
|
|
| 537 |
total = observed_total
|
| 538 |
total_matched = observed_total
|
| 539 |
total_available = overview_total if overview_total is not None else observed_total
|
| 540 |
-
items = normalized[:
|
| 541 |
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 542 |
count_source = (
|
| 543 |
"overview" if overview_total is not None and (not filtered) else "scan"
|
| 544 |
)
|
| 545 |
sample_complete = (
|
| 546 |
exact_count
|
| 547 |
-
and len(normalized) <=
|
| 548 |
and (not count_only or len(normalized) == 0)
|
| 549 |
)
|
| 550 |
more_available = ctx._derive_more_available(
|
|
@@ -555,7 +584,19 @@ async def _user_graph_helper(
|
|
| 555 |
)
|
| 556 |
if not exact_count and scan_limit_hit:
|
| 557 |
more_available = "unknown" if filtered else True
|
| 558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
meta = ctx._build_exhaustive_result_meta(
|
| 560 |
base_meta={
|
| 561 |
"scanned": observed_total,
|
|
@@ -645,8 +686,8 @@ async def hf_profile_summary(
|
|
| 645 |
"display_name": overview_item.get("fullname")
|
| 646 |
or str(overview_item.get("username") or resolved_handle),
|
| 647 |
"bio": overview_item.get("bio"),
|
| 648 |
-
"avatar_url": overview_item.get("
|
| 649 |
-
"website_url": overview_item.get("
|
| 650 |
"twitter_url": overview_item.get("twitter"),
|
| 651 |
"github_url": overview_item.get("github"),
|
| 652 |
"linkedin_url": overview_item.get("linkedin"),
|
|
@@ -661,13 +702,13 @@ async def hf_profile_summary(
|
|
| 661 |
"papers_count": ctx._overview_count(overview_item, "papers"),
|
| 662 |
"upvotes_count": ctx._overview_count(overview_item, "upvotes"),
|
| 663 |
"organizations": overview_item.get("orgs"),
|
| 664 |
-
"is_pro": overview_item.get("
|
| 665 |
}
|
| 666 |
if "likes" in requested_sections:
|
| 667 |
likes = await ctx.call_helper(
|
| 668 |
"hf_user_likes",
|
| 669 |
username=resolved_handle,
|
| 670 |
-
|
| 671 |
scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
|
| 672 |
count_only=likes_lim == 0,
|
| 673 |
sort="liked_at",
|
|
@@ -689,7 +730,7 @@ async def hf_profile_summary(
|
|
| 689 |
"hf_recent_activity",
|
| 690 |
feed_type="user",
|
| 691 |
entity=resolved_handle,
|
| 692 |
-
|
| 693 |
max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
|
| 694 |
count_only=activity_lim == 0,
|
| 695 |
fields=["timestamp", "event_type", "repo_type", "repo_id"],
|
|
@@ -724,11 +765,11 @@ async def hf_profile_summary(
|
|
| 724 |
item = {
|
| 725 |
"handle": str(overview_item.get("organization") or resolved_handle),
|
| 726 |
"entity_type": "organization",
|
| 727 |
-
"display_name": overview_item.get("
|
| 728 |
or str(overview_item.get("organization") or resolved_handle),
|
| 729 |
"description": overview_item.get("description"),
|
| 730 |
-
"avatar_url": overview_item.get("
|
| 731 |
-
"website_url": overview_item.get("
|
| 732 |
"followers_count": ctx._overview_count(overview_item, "followers"),
|
| 733 |
"members_count": ctx._overview_count(overview_item, "members"),
|
| 734 |
"models_count": ctx._overview_count(overview_item, "models"),
|
|
@@ -765,7 +806,7 @@ async def hf_user_graph(
|
|
| 765 |
ctx: HelperRuntimeContext,
|
| 766 |
username: str | None = None,
|
| 767 |
relation: str = "followers",
|
| 768 |
-
|
| 769 |
scan_limit: int | None = None,
|
| 770 |
count_only: bool = False,
|
| 771 |
pro_only: bool | None = None,
|
|
@@ -800,7 +841,7 @@ async def hf_user_graph(
|
|
| 800 |
rel,
|
| 801 |
resolved_username,
|
| 802 |
pro_only,
|
| 803 |
-
|
| 804 |
scan_limit,
|
| 805 |
count_only,
|
| 806 |
where,
|
|
|
|
| 5 |
import re
|
| 6 |
from typing import Any, Callable
|
| 7 |
from ..context_types import HelperRuntimeContext
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from ..constants import (
|
| 9 |
+
ACTOR_CANONICAL_FIELDS,
|
| 10 |
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 11 |
GRAPH_SCAN_LIMIT_CAP,
|
| 12 |
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
|
|
|
| 71 |
item = {
|
| 72 |
"username": username,
|
| 73 |
"fullname": payload.get("fullname"),
|
| 74 |
+
"is_pro": payload.get("isPro"),
|
| 75 |
}
|
| 76 |
items = [item] if isinstance(username, str) and username else []
|
| 77 |
return ctx._helper_success(
|
|
|
|
| 145 |
"username": obj.username or u,
|
| 146 |
"fullname": obj.fullname,
|
| 147 |
"bio": getattr(obj, "details", None),
|
| 148 |
+
"avatar_url": obj.avatar_url,
|
| 149 |
+
"website_url": getattr(obj, "websiteUrl", None),
|
| 150 |
"twitter": _social_url("twitter", twitter_handle),
|
| 151 |
"github": _social_url("github", github_handle),
|
| 152 |
"linkedin": _social_url("linkedin", linkedin_handle),
|
| 153 |
"bluesky": _social_url("bluesky", bluesky_handle),
|
| 154 |
+
"twitter_handle": twitter_handle,
|
| 155 |
+
"github_handle": github_handle,
|
| 156 |
+
"linkedin_handle": linkedin_handle,
|
| 157 |
+
"bluesky_handle": bluesky_handle,
|
| 158 |
"followers": ctx._as_int(obj.num_followers),
|
| 159 |
"following": ctx._as_int(obj.num_following),
|
| 160 |
"likes": ctx._as_int(obj.num_likes),
|
|
|
|
| 165 |
"papers": ctx._as_int(getattr(obj, "num_papers", None)),
|
| 166 |
"upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
|
| 167 |
"orgs": org_names,
|
| 168 |
+
"is_pro": obj.is_pro,
|
| 169 |
}
|
| 170 |
return ctx._helper_success(
|
| 171 |
start_calls=start_calls,
|
|
|
|
| 199 |
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 200 |
item = {
|
| 201 |
"organization": obj.name or org,
|
| 202 |
+
"display_name": obj.fullname,
|
| 203 |
+
"avatar_url": obj.avatar_url,
|
| 204 |
"description": obj.details,
|
| 205 |
+
"website_url": getattr(obj, "websiteUrl", None),
|
| 206 |
"followers": ctx._as_int(obj.num_followers),
|
| 207 |
"members": ctx._as_int(obj.num_users),
|
| 208 |
"models": ctx._as_int(getattr(obj, "num_models", None)),
|
|
|
|
| 223 |
async def hf_org_members(
|
| 224 |
ctx: HelperRuntimeContext,
|
| 225 |
organization: str,
|
| 226 |
+
limit: int | None = None,
|
| 227 |
scan_limit: int | None = None,
|
| 228 |
count_only: bool = False,
|
| 229 |
where: dict[str, Any] | None = None,
|
|
|
|
| 237 |
source="/api/organizations/<o>/members",
|
| 238 |
error="organization is required",
|
| 239 |
)
|
| 240 |
+
default_limit = ctx._policy_int("hf_org_members", "default_limit", 100)
|
| 241 |
scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 242 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 243 |
+
limit=limit,
|
| 244 |
count_only=count_only,
|
| 245 |
+
default_limit=default_limit,
|
| 246 |
+
max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 247 |
scan_limit=scan_limit,
|
| 248 |
scan_cap=scan_cap,
|
| 249 |
)
|
| 250 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 251 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 252 |
has_where = isinstance(where, dict) and bool(where)
|
| 253 |
overview_total: int | None = None
|
|
|
|
| 296 |
item = {
|
| 297 |
"username": handle,
|
| 298 |
"fullname": getattr(row, "fullname", None),
|
| 299 |
+
"is_pro": getattr(row, "is_pro", None),
|
| 300 |
"role": getattr(row, "role", None),
|
| 301 |
}
|
| 302 |
normalized.append(item)
|
| 303 |
+
try:
|
| 304 |
+
normalized = ctx._apply_where(
|
| 305 |
+
normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
|
| 306 |
+
)
|
| 307 |
+
except ValueError as exc:
|
| 308 |
+
return ctx._helper_error(
|
| 309 |
+
start_calls=start_calls,
|
| 310 |
+
source=endpoint,
|
| 311 |
+
error=exc,
|
| 312 |
+
organization=org,
|
| 313 |
+
)
|
| 314 |
observed_total = len(rows)
|
| 315 |
scan_exhaustive = observed_total < scan_lim
|
| 316 |
overview_list_mismatch = (
|
|
|
|
| 331 |
total = observed_total
|
| 332 |
total_matched = observed_total
|
| 333 |
total_available = overview_total if overview_total is not None else observed_total
|
| 334 |
+
items = normalized[:applied_limit]
|
| 335 |
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 336 |
count_source = (
|
| 337 |
"overview" if overview_total is not None and (not has_where) else "scan"
|
| 338 |
)
|
| 339 |
sample_complete = (
|
| 340 |
exact_count
|
| 341 |
+
and len(normalized) <= applied_limit
|
| 342 |
and (not count_only or len(normalized) == 0)
|
| 343 |
)
|
| 344 |
more_available = ctx._derive_more_available(
|
|
|
|
| 349 |
)
|
| 350 |
if not exact_count and scan_limit_hit:
|
| 351 |
more_available = "unknown" if has_where else True
|
| 352 |
+
try:
|
| 353 |
+
items = ctx._project_actor_items(items, fields)
|
| 354 |
+
except ValueError as exc:
|
| 355 |
+
return ctx._helper_error(
|
| 356 |
+
start_calls=start_calls,
|
| 357 |
+
source=endpoint,
|
| 358 |
+
error=exc,
|
| 359 |
+
organization=org,
|
| 360 |
+
)
|
| 361 |
meta = ctx._build_exhaustive_result_meta(
|
| 362 |
base_meta={
|
| 363 |
"scanned": observed_total,
|
|
|
|
| 390 |
kind: str,
|
| 391 |
username: str,
|
| 392 |
pro_only: bool | None,
|
| 393 |
+
limit: int | None,
|
| 394 |
scan_limit: int | None,
|
| 395 |
count_only: bool,
|
| 396 |
where: dict[str, Any] | None,
|
|
|
|
| 399 |
helper_name: str,
|
| 400 |
) -> dict[str, Any]:
|
| 401 |
start_calls = ctx.call_count["n"]
|
| 402 |
+
default_limit = ctx._policy_int(helper_name, "default_limit", 100)
|
| 403 |
scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 404 |
+
max_limit = ctx._policy_int(
|
| 405 |
+
helper_name, "max_limit", EXHAUSTIVE_HELPER_RETURN_HARD_CAP
|
| 406 |
)
|
| 407 |
u = str(username or "").strip()
|
| 408 |
if not u:
|
|
|
|
| 412 |
error="username is required",
|
| 413 |
)
|
| 414 |
limit_plan = ctx._resolve_exhaustive_limits(
|
| 415 |
+
limit=limit,
|
| 416 |
count_only=count_only,
|
| 417 |
+
default_limit=default_limit,
|
| 418 |
+
max_limit=max_limit,
|
| 419 |
scan_limit=scan_limit,
|
| 420 |
scan_cap=scan_cap,
|
| 421 |
)
|
| 422 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 423 |
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 424 |
has_where = isinstance(where, dict) and bool(where)
|
| 425 |
filtered = pro_only is not None or has_where
|
|
|
|
| 524 |
item = {
|
| 525 |
"username": handle,
|
| 526 |
"fullname": getattr(row, "fullname", None),
|
| 527 |
+
"is_pro": getattr(row, "is_pro", None),
|
| 528 |
}
|
| 529 |
+
if pro_only is True and item.get("is_pro") is not True:
|
| 530 |
continue
|
| 531 |
+
if pro_only is False and item.get("is_pro") is True:
|
| 532 |
continue
|
| 533 |
normalized.append(item)
|
| 534 |
+
try:
|
| 535 |
+
normalized = ctx._apply_where(
|
| 536 |
+
normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
|
| 537 |
+
)
|
| 538 |
+
except ValueError as exc:
|
| 539 |
+
return ctx._helper_error(
|
| 540 |
+
start_calls=start_calls,
|
| 541 |
+
source=endpoint,
|
| 542 |
+
error=exc,
|
| 543 |
+
relation=kind,
|
| 544 |
+
username=u,
|
| 545 |
+
entity=u,
|
| 546 |
+
entity_type=entity_type,
|
| 547 |
+
organization=u if entity_type == "organization" else None,
|
| 548 |
+
)
|
| 549 |
observed_total = len(rows)
|
| 550 |
scan_exhaustive = observed_total < scan_lim
|
| 551 |
overview_list_mismatch = (
|
|
|
|
| 566 |
total = observed_total
|
| 567 |
total_matched = observed_total
|
| 568 |
total_available = overview_total if overview_total is not None else observed_total
|
| 569 |
+
items = normalized[:applied_limit]
|
| 570 |
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 571 |
count_source = (
|
| 572 |
"overview" if overview_total is not None and (not filtered) else "scan"
|
| 573 |
)
|
| 574 |
sample_complete = (
|
| 575 |
exact_count
|
| 576 |
+
and len(normalized) <= applied_limit
|
| 577 |
and (not count_only or len(normalized) == 0)
|
| 578 |
)
|
| 579 |
more_available = ctx._derive_more_available(
|
|
|
|
| 584 |
)
|
| 585 |
if not exact_count and scan_limit_hit:
|
| 586 |
more_available = "unknown" if filtered else True
|
| 587 |
+
try:
|
| 588 |
+
items = ctx._project_actor_items(items, fields)
|
| 589 |
+
except ValueError as exc:
|
| 590 |
+
return ctx._helper_error(
|
| 591 |
+
start_calls=start_calls,
|
| 592 |
+
source=endpoint,
|
| 593 |
+
error=exc,
|
| 594 |
+
relation=kind,
|
| 595 |
+
username=u,
|
| 596 |
+
entity=u,
|
| 597 |
+
entity_type=entity_type,
|
| 598 |
+
organization=u if entity_type == "organization" else None,
|
| 599 |
+
)
|
| 600 |
meta = ctx._build_exhaustive_result_meta(
|
| 601 |
base_meta={
|
| 602 |
"scanned": observed_total,
|
|
|
|
| 686 |
"display_name": overview_item.get("fullname")
|
| 687 |
or str(overview_item.get("username") or resolved_handle),
|
| 688 |
"bio": overview_item.get("bio"),
|
| 689 |
+
"avatar_url": overview_item.get("avatar_url"),
|
| 690 |
+
"website_url": overview_item.get("website_url"),
|
| 691 |
"twitter_url": overview_item.get("twitter"),
|
| 692 |
"github_url": overview_item.get("github"),
|
| 693 |
"linkedin_url": overview_item.get("linkedin"),
|
|
|
|
| 702 |
"papers_count": ctx._overview_count(overview_item, "papers"),
|
| 703 |
"upvotes_count": ctx._overview_count(overview_item, "upvotes"),
|
| 704 |
"organizations": overview_item.get("orgs"),
|
| 705 |
+
"is_pro": overview_item.get("is_pro"),
|
| 706 |
}
|
| 707 |
if "likes" in requested_sections:
|
| 708 |
likes = await ctx.call_helper(
|
| 709 |
"hf_user_likes",
|
| 710 |
username=resolved_handle,
|
| 711 |
+
limit=likes_lim,
|
| 712 |
scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
|
| 713 |
count_only=likes_lim == 0,
|
| 714 |
sort="liked_at",
|
|
|
|
| 730 |
"hf_recent_activity",
|
| 731 |
feed_type="user",
|
| 732 |
entity=resolved_handle,
|
| 733 |
+
limit=activity_lim,
|
| 734 |
max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
|
| 735 |
count_only=activity_lim == 0,
|
| 736 |
fields=["timestamp", "event_type", "repo_type", "repo_id"],
|
|
|
|
| 765 |
item = {
|
| 766 |
"handle": str(overview_item.get("organization") or resolved_handle),
|
| 767 |
"entity_type": "organization",
|
| 768 |
+
"display_name": overview_item.get("display_name")
|
| 769 |
or str(overview_item.get("organization") or resolved_handle),
|
| 770 |
"description": overview_item.get("description"),
|
| 771 |
+
"avatar_url": overview_item.get("avatar_url"),
|
| 772 |
+
"website_url": overview_item.get("website_url"),
|
| 773 |
"followers_count": ctx._overview_count(overview_item, "followers"),
|
| 774 |
"members_count": ctx._overview_count(overview_item, "members"),
|
| 775 |
"models_count": ctx._overview_count(overview_item, "models"),
|
|
|
|
| 806 |
ctx: HelperRuntimeContext,
|
| 807 |
username: str | None = None,
|
| 808 |
relation: str = "followers",
|
| 809 |
+
limit: int | None = None,
|
| 810 |
scan_limit: int | None = None,
|
| 811 |
count_only: bool = False,
|
| 812 |
pro_only: bool | None = None,
|
|
|
|
| 841 |
rel,
|
| 842 |
resolved_username,
|
| 843 |
pro_only,
|
| 844 |
+
limit,
|
| 845 |
scan_limit,
|
| 846 |
count_only,
|
| 847 |
where,
|
monty_api/http_runtime.py
CHANGED
|
@@ -9,11 +9,11 @@ from urllib.request import Request, urlopen
|
|
| 9 |
|
| 10 |
from huggingface_hub import HfApi
|
| 11 |
|
| 12 |
-
from .aliases import REPO_SORT_KEYS
|
| 13 |
from .constants import (
|
| 14 |
DEFAULT_TIMEOUT_SEC,
|
| 15 |
)
|
| 16 |
-
from .registry import REPO_API_ADAPTERS
|
| 17 |
from .validation import _endpoint_allowed, _normalize_endpoint, _sanitize_params
|
| 18 |
|
| 19 |
|
|
@@ -78,10 +78,14 @@ def _normalize_repo_sort_key(
|
|
| 78 |
if not raw:
|
| 79 |
return None, None
|
| 80 |
|
| 81 |
-
key =
|
| 82 |
-
if key
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
return None, f"Invalid sort key '{raw}'"
|
| 86 |
|
| 87 |
rt = _canonical_repo_type(repo_type)
|
|
@@ -111,6 +115,8 @@ def _repo_list_call(api: HfApi, repo_type: str, **kwargs: Any) -> list[Any]:
|
|
| 111 |
def _repo_detail_call(api: HfApi, repo_type: str, repo_id: str) -> Any:
|
| 112 |
adapter = _repo_api_adapter(repo_type)
|
| 113 |
method = getattr(api, adapter.detail_method_name)
|
|
|
|
|
|
|
| 114 |
return method(repo_id)
|
| 115 |
|
| 116 |
|
|
@@ -138,6 +144,43 @@ def _optional_str_list(value: Any) -> list[str] | None:
|
|
| 138 |
return None
|
| 139 |
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int | None:
|
| 142 |
direct = _as_int(num_params)
|
| 143 |
if direct is not None:
|
|
@@ -149,6 +192,24 @@ def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int
|
|
| 149 |
return _as_int(total)
|
| 150 |
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
def _extract_author_names(value: Any) -> list[str] | None:
|
| 153 |
if not isinstance(value, (list, tuple)):
|
| 154 |
return None
|
|
@@ -242,6 +303,8 @@ def _build_repo_row(
|
|
| 242 |
models: Any = None,
|
| 243 |
datasets: Any = None,
|
| 244 |
subdomain: Any = None,
|
|
|
|
|
|
|
| 245 |
) -> dict[str, Any]:
|
| 246 |
rt = _canonical_repo_type(repo_type)
|
| 247 |
author_value = author
|
|
@@ -252,6 +315,15 @@ def _build_repo_row(
|
|
| 252 |
):
|
| 253 |
author_value = repo_id.split("/", 1)[0]
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
return {
|
| 256 |
"id": repo_id,
|
| 257 |
"slug": repo_id,
|
|
@@ -279,6 +351,8 @@ def _build_repo_row(
|
|
| 279 |
"models": _optional_str_list(models),
|
| 280 |
"datasets": _optional_str_list(datasets),
|
| 281 |
"subdomain": subdomain,
|
|
|
|
|
|
|
| 282 |
}
|
| 283 |
|
| 284 |
|
|
@@ -292,9 +366,7 @@ def _normalize_repo_search_row(row: Any, repo_type: str) -> dict[str, Any]:
|
|
| 292 |
created_at=getattr(row, "created_at", None),
|
| 293 |
last_modified=getattr(row, "last_modified", None),
|
| 294 |
pipeline_tag=getattr(row, "pipeline_tag", None),
|
| 295 |
-
num_params=
|
| 296 |
-
getattr(row, "num_params", None), getattr(row, "safetensors", None)
|
| 297 |
-
),
|
| 298 |
private=getattr(row, "private", None),
|
| 299 |
trending_score=getattr(row, "trending_score", None),
|
| 300 |
tags=getattr(row, "tags", None),
|
|
@@ -307,6 +379,7 @@ def _normalize_repo_search_row(row: Any, repo_type: str) -> dict[str, Any]:
|
|
| 307 |
models=getattr(row, "models", None),
|
| 308 |
datasets=getattr(row, "datasets", None),
|
| 309 |
subdomain=getattr(row, "subdomain", None),
|
|
|
|
| 310 |
)
|
| 311 |
|
| 312 |
|
|
@@ -325,11 +398,6 @@ def _normalize_repo_detail_row(
|
|
| 325 |
def _normalize_trending_row(
|
| 326 |
repo: dict[str, Any], default_repo_type: str, rank: int | None = None
|
| 327 |
) -> dict[str, Any]:
|
| 328 |
-
raw_num_params = (
|
| 329 |
-
repo.get("num_params")
|
| 330 |
-
if repo.get("num_params") is not None
|
| 331 |
-
else repo.get("numParameters")
|
| 332 |
-
)
|
| 333 |
row = _build_repo_row(
|
| 334 |
repo_id=repo.get("id"),
|
| 335 |
repo_type=repo.get("type") or repo.get("repoType") or default_repo_type,
|
|
@@ -339,7 +407,7 @@ def _normalize_trending_row(
|
|
| 339 |
created_at=repo.get("createdAt"),
|
| 340 |
last_modified=repo.get("lastModified"),
|
| 341 |
pipeline_tag=repo.get("pipeline_tag"),
|
| 342 |
-
num_params=
|
| 343 |
private=repo.get("private"),
|
| 344 |
trending_score=repo.get("trendingScore"),
|
| 345 |
tags=repo.get("tags"),
|
|
@@ -352,6 +420,8 @@ def _normalize_trending_row(
|
|
| 352 |
models=repo.get("models"),
|
| 353 |
datasets=repo.get("datasets"),
|
| 354 |
subdomain=repo.get("subdomain"),
|
|
|
|
|
|
|
| 355 |
)
|
| 356 |
if rank is not None:
|
| 357 |
row["trending_rank"] = rank
|
|
@@ -419,7 +489,7 @@ def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | Non
|
|
| 419 |
created_at=row.get("createdAt") or row.get("created_at"),
|
| 420 |
last_modified=row.get("lastModified") or row.get("last_modified"),
|
| 421 |
pipeline_tag=row.get("pipeline_tag") or row.get("pipelineTag"),
|
| 422 |
-
num_params=
|
| 423 |
private=row.get("private"),
|
| 424 |
tags=row.get("tags"),
|
| 425 |
gated=row.get("gated"),
|
|
@@ -430,6 +500,8 @@ def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | Non
|
|
| 430 |
models=row.get("models"),
|
| 431 |
datasets=row.get("datasets"),
|
| 432 |
subdomain=row.get("subdomain"),
|
|
|
|
|
|
|
| 433 |
)
|
| 434 |
|
| 435 |
|
|
|
|
| 9 |
|
| 10 |
from huggingface_hub import HfApi
|
| 11 |
|
| 12 |
+
from .aliases import REPO_SORT_KEYS
|
| 13 |
from .constants import (
|
| 14 |
DEFAULT_TIMEOUT_SEC,
|
| 15 |
)
|
| 16 |
+
from .registry import REPO_API_ADAPTERS, REPO_SEARCH_DEFAULT_EXPAND
|
| 17 |
from .validation import _endpoint_allowed, _normalize_endpoint, _sanitize_params
|
| 18 |
|
| 19 |
|
|
|
|
| 78 |
if not raw:
|
| 79 |
return None, None
|
| 80 |
|
| 81 |
+
key = raw
|
| 82 |
+
if key not in {
|
| 83 |
+
"created_at",
|
| 84 |
+
"downloads",
|
| 85 |
+
"last_modified",
|
| 86 |
+
"likes",
|
| 87 |
+
"trending_score",
|
| 88 |
+
}:
|
| 89 |
return None, f"Invalid sort key '{raw}'"
|
| 90 |
|
| 91 |
rt = _canonical_repo_type(repo_type)
|
|
|
|
| 115 |
def _repo_detail_call(api: HfApi, repo_type: str, repo_id: str) -> Any:
|
| 116 |
adapter = _repo_api_adapter(repo_type)
|
| 117 |
method = getattr(api, adapter.detail_method_name)
|
| 118 |
+
if _canonical_repo_type(repo_type) == "space":
|
| 119 |
+
return method(repo_id, expand=list(REPO_SEARCH_DEFAULT_EXPAND["space"]))
|
| 120 |
return method(repo_id)
|
| 121 |
|
| 122 |
|
|
|
|
| 144 |
return None
|
| 145 |
|
| 146 |
|
| 147 |
+
def _space_runtime_to_dict(value: Any) -> dict[str, Any] | None:
|
| 148 |
+
if value is None:
|
| 149 |
+
return None
|
| 150 |
+
|
| 151 |
+
if isinstance(value, dict):
|
| 152 |
+
raw = value
|
| 153 |
+
hardware = raw.get("hardware")
|
| 154 |
+
current_hardware = (
|
| 155 |
+
hardware.get("current") if isinstance(hardware, dict) else hardware
|
| 156 |
+
)
|
| 157 |
+
requested_hardware = (
|
| 158 |
+
hardware.get("requested")
|
| 159 |
+
if isinstance(hardware, dict)
|
| 160 |
+
else raw.get("requested_hardware") or raw.get("requestedHardware")
|
| 161 |
+
)
|
| 162 |
+
sleep_time = _as_int(
|
| 163 |
+
raw.get("gcTimeout")
|
| 164 |
+
if raw.get("gcTimeout") is not None
|
| 165 |
+
else raw.get("sleep_time") or raw.get("sleepTime")
|
| 166 |
+
)
|
| 167 |
+
out = {
|
| 168 |
+
"stage": raw.get("stage"),
|
| 169 |
+
"hardware": current_hardware,
|
| 170 |
+
"requested_hardware": requested_hardware,
|
| 171 |
+
"sleep_time": sleep_time,
|
| 172 |
+
}
|
| 173 |
+
return {key: val for key, val in out.items() if val is not None} or None
|
| 174 |
+
|
| 175 |
+
out = {
|
| 176 |
+
"stage": getattr(value, "stage", None),
|
| 177 |
+
"hardware": getattr(value, "hardware", None),
|
| 178 |
+
"requested_hardware": getattr(value, "requested_hardware", None),
|
| 179 |
+
"sleep_time": _as_int(getattr(value, "sleep_time", None)),
|
| 180 |
+
}
|
| 181 |
+
return {key: val for key, val in out.items() if val is not None} or None
|
| 182 |
+
|
| 183 |
+
|
| 184 |
def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int | None:
|
| 185 |
direct = _as_int(num_params)
|
| 186 |
if direct is not None:
|
|
|
|
| 192 |
return _as_int(total)
|
| 193 |
|
| 194 |
|
| 195 |
+
def _extract_num_params_from_object(row: Any) -> int | None:
|
| 196 |
+
raw_num_params = getattr(row, "num_params", None)
|
| 197 |
+
if raw_num_params is None:
|
| 198 |
+
raw_num_params = getattr(row, "numParameters", None)
|
| 199 |
+
if raw_num_params is None:
|
| 200 |
+
raw_num_params = getattr(row, "num_parameters", None)
|
| 201 |
+
return _extract_num_params(raw_num_params, getattr(row, "safetensors", None))
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def _extract_num_params_from_dict(row: dict[str, Any]) -> int | None:
|
| 205 |
+
raw_num_params = row.get("num_params")
|
| 206 |
+
if raw_num_params is None:
|
| 207 |
+
raw_num_params = row.get("numParameters")
|
| 208 |
+
if raw_num_params is None:
|
| 209 |
+
raw_num_params = row.get("num_parameters")
|
| 210 |
+
return _extract_num_params(raw_num_params, row.get("safetensors"))
|
| 211 |
+
|
| 212 |
+
|
| 213 |
def _extract_author_names(value: Any) -> list[str] | None:
|
| 214 |
if not isinstance(value, (list, tuple)):
|
| 215 |
return None
|
|
|
|
| 303 |
models: Any = None,
|
| 304 |
datasets: Any = None,
|
| 305 |
subdomain: Any = None,
|
| 306 |
+
runtime: Any = None,
|
| 307 |
+
runtime_stage: Any = None,
|
| 308 |
) -> dict[str, Any]:
|
| 309 |
rt = _canonical_repo_type(repo_type)
|
| 310 |
author_value = author
|
|
|
|
| 315 |
):
|
| 316 |
author_value = repo_id.split("/", 1)[0]
|
| 317 |
|
| 318 |
+
runtime_payload = _space_runtime_to_dict(runtime)
|
| 319 |
+
resolved_runtime_stage = (
|
| 320 |
+
runtime_stage
|
| 321 |
+
if runtime_stage is not None
|
| 322 |
+
else runtime_payload.get("stage")
|
| 323 |
+
if isinstance(runtime_payload, dict)
|
| 324 |
+
else None
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
return {
|
| 328 |
"id": repo_id,
|
| 329 |
"slug": repo_id,
|
|
|
|
| 351 |
"models": _optional_str_list(models),
|
| 352 |
"datasets": _optional_str_list(datasets),
|
| 353 |
"subdomain": subdomain,
|
| 354 |
+
"runtime_stage": resolved_runtime_stage,
|
| 355 |
+
"runtime": runtime_payload,
|
| 356 |
}
|
| 357 |
|
| 358 |
|
|
|
|
| 366 |
created_at=getattr(row, "created_at", None),
|
| 367 |
last_modified=getattr(row, "last_modified", None),
|
| 368 |
pipeline_tag=getattr(row, "pipeline_tag", None),
|
| 369 |
+
num_params=_extract_num_params_from_object(row),
|
|
|
|
|
|
|
| 370 |
private=getattr(row, "private", None),
|
| 371 |
trending_score=getattr(row, "trending_score", None),
|
| 372 |
tags=getattr(row, "tags", None),
|
|
|
|
| 379 |
models=getattr(row, "models", None),
|
| 380 |
datasets=getattr(row, "datasets", None),
|
| 381 |
subdomain=getattr(row, "subdomain", None),
|
| 382 |
+
runtime=getattr(row, "runtime", None),
|
| 383 |
)
|
| 384 |
|
| 385 |
|
|
|
|
| 398 |
def _normalize_trending_row(
|
| 399 |
repo: dict[str, Any], default_repo_type: str, rank: int | None = None
|
| 400 |
) -> dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
row = _build_repo_row(
|
| 402 |
repo_id=repo.get("id"),
|
| 403 |
repo_type=repo.get("type") or repo.get("repoType") or default_repo_type,
|
|
|
|
| 407 |
created_at=repo.get("createdAt"),
|
| 408 |
last_modified=repo.get("lastModified"),
|
| 409 |
pipeline_tag=repo.get("pipeline_tag"),
|
| 410 |
+
num_params=_extract_num_params_from_dict(repo),
|
| 411 |
private=repo.get("private"),
|
| 412 |
trending_score=repo.get("trendingScore"),
|
| 413 |
tags=repo.get("tags"),
|
|
|
|
| 420 |
models=repo.get("models"),
|
| 421 |
datasets=repo.get("datasets"),
|
| 422 |
subdomain=repo.get("subdomain"),
|
| 423 |
+
runtime=repo.get("runtime"),
|
| 424 |
+
runtime_stage=repo.get("runtime_stage") or repo.get("runtimeStage"),
|
| 425 |
)
|
| 426 |
if rank is not None:
|
| 427 |
row["trending_rank"] = rank
|
|
|
|
| 489 |
created_at=row.get("createdAt") or row.get("created_at"),
|
| 490 |
last_modified=row.get("lastModified") or row.get("last_modified"),
|
| 491 |
pipeline_tag=row.get("pipeline_tag") or row.get("pipelineTag"),
|
| 492 |
+
num_params=_extract_num_params_from_dict(row),
|
| 493 |
private=row.get("private"),
|
| 494 |
tags=row.get("tags"),
|
| 495 |
gated=row.get("gated"),
|
|
|
|
| 500 |
models=row.get("models"),
|
| 501 |
datasets=row.get("datasets"),
|
| 502 |
subdomain=row.get("subdomain"),
|
| 503 |
+
runtime=row.get("runtime"),
|
| 504 |
+
runtime_stage=row.get("runtime_stage") or row.get("runtimeStage"),
|
| 505 |
)
|
| 506 |
|
| 507 |
|
monty_api/query_entrypoints.py
CHANGED
|
@@ -4,6 +4,8 @@ import argparse
|
|
| 4 |
import asyncio
|
| 5 |
import inspect
|
| 6 |
import json
|
|
|
|
|
|
|
| 7 |
import time
|
| 8 |
from typing import Any, Callable
|
| 9 |
|
|
@@ -33,6 +35,25 @@ class MontyExecutionError(RuntimeError):
|
|
| 33 |
self.trace = trace
|
| 34 |
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
def _introspect_helper_signatures() -> dict[str, set[str]]:
|
| 37 |
env = build_runtime_helper_environment(
|
| 38 |
max_calls=DEFAULT_MAX_CALLS,
|
|
@@ -213,6 +234,12 @@ async def _execute_query(
|
|
| 213 |
timeout_sec=timeout_sec,
|
| 214 |
)
|
| 215 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
return await _run_with_monty(
|
| 217 |
code=prepared_code,
|
| 218 |
query=prepared_query,
|
|
|
|
| 4 |
import asyncio
|
| 5 |
import inspect
|
| 6 |
import json
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
import time
|
| 10 |
from typing import Any, Callable
|
| 11 |
|
|
|
|
| 35 |
self.trace = trace
|
| 36 |
|
| 37 |
|
| 38 |
+
def _query_debug_enabled() -> bool:
|
| 39 |
+
value = os.environ.get("MONTY_DEBUG_QUERY", "")
|
| 40 |
+
return value.strip().lower() in {"1", "true", "yes", "on"}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _log_generated_query(
|
| 44 |
+
*, query: str, code: str, max_calls: int | None, timeout_sec: int | None
|
| 45 |
+
) -> None:
|
| 46 |
+
if not _query_debug_enabled():
|
| 47 |
+
return
|
| 48 |
+
print("[monty-debug] query:", file=sys.stderr)
|
| 49 |
+
print(query, file=sys.stderr)
|
| 50 |
+
print("[monty-debug] max_calls:", max_calls, file=sys.stderr)
|
| 51 |
+
print("[monty-debug] timeout_sec:", timeout_sec, file=sys.stderr)
|
| 52 |
+
print("[monty-debug] code:", file=sys.stderr)
|
| 53 |
+
print(code, file=sys.stderr)
|
| 54 |
+
sys.stderr.flush()
|
| 55 |
+
|
| 56 |
+
|
| 57 |
def _introspect_helper_signatures() -> dict[str, set[str]]:
|
| 58 |
env = build_runtime_helper_environment(
|
| 59 |
max_calls=DEFAULT_MAX_CALLS,
|
|
|
|
| 234 |
timeout_sec=timeout_sec,
|
| 235 |
)
|
| 236 |
)
|
| 237 |
+
_log_generated_query(
|
| 238 |
+
query=prepared_query,
|
| 239 |
+
code=prepared_code,
|
| 240 |
+
max_calls=prepared_max_calls,
|
| 241 |
+
timeout_sec=prepared_timeout,
|
| 242 |
+
)
|
| 243 |
return await _run_with_monty(
|
| 244 |
code=prepared_code,
|
| 245 |
query=prepared_query,
|
monty_api/runtime_context.py
CHANGED
|
@@ -60,6 +60,8 @@ from .runtime_filtering import (
|
|
| 60 |
_project_activity_items,
|
| 61 |
_project_actor_items,
|
| 62 |
_project_collection_items,
|
|
|
|
|
|
|
| 63 |
_project_daily_paper_items,
|
| 64 |
_project_items,
|
| 65 |
_project_repo_items,
|
|
@@ -215,6 +217,8 @@ for name, value in {
|
|
| 215 |
"_project_items": _project_items,
|
| 216 |
"_project_repo_items": _project_repo_items,
|
| 217 |
"_project_collection_items": _project_collection_items,
|
|
|
|
|
|
|
| 218 |
"_project_daily_paper_items": _project_daily_paper_items,
|
| 219 |
"_project_user_items": _project_user_items,
|
| 220 |
"_project_actor_items": _project_actor_items,
|
|
|
|
| 60 |
_project_activity_items,
|
| 61 |
_project_actor_items,
|
| 62 |
_project_collection_items,
|
| 63 |
+
_project_discussion_detail_items,
|
| 64 |
+
_project_discussion_items,
|
| 65 |
_project_daily_paper_items,
|
| 66 |
_project_items,
|
| 67 |
_project_repo_items,
|
|
|
|
| 217 |
"_project_items": _project_items,
|
| 218 |
"_project_repo_items": _project_repo_items,
|
| 219 |
"_project_collection_items": _project_collection_items,
|
| 220 |
+
"_project_discussion_items": _project_discussion_items,
|
| 221 |
+
"_project_discussion_detail_items": _project_discussion_detail_items,
|
| 222 |
"_project_daily_paper_items": _project_daily_paper_items,
|
| 223 |
"_project_user_items": _project_user_items,
|
| 224 |
"_project_actor_items": _project_actor_items,
|
monty_api/runtime_envelopes.py
CHANGED
|
@@ -21,8 +21,8 @@ def _helper_meta(
|
|
| 21 |
def _derive_limit_metadata(
|
| 22 |
self: Any,
|
| 23 |
*,
|
| 24 |
-
|
| 25 |
-
|
| 26 |
default_limit_used: bool,
|
| 27 |
requested_scan_limit: int | None = None,
|
| 28 |
applied_scan_limit: int | None = None,
|
|
@@ -30,8 +30,8 @@ def _derive_limit_metadata(
|
|
| 30 |
applied_max_pages: int | None = None,
|
| 31 |
) -> dict[str, Any]:
|
| 32 |
meta: dict[str, Any] = {
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
"default_limit_used": default_limit_used,
|
| 36 |
}
|
| 37 |
if requested_scan_limit is not None or applied_scan_limit is not None:
|
|
@@ -42,8 +42,8 @@ def _derive_limit_metadata(
|
|
| 42 |
meta["requested_max_pages"] = requested_max_pages
|
| 43 |
meta["applied_max_pages"] = applied_max_pages
|
| 44 |
meta["page_limit_applied"] = requested_max_pages != applied_max_pages
|
| 45 |
-
if
|
| 46 |
-
meta["hard_cap_applied"] =
|
| 47 |
return meta
|
| 48 |
|
| 49 |
|
|
@@ -68,9 +68,9 @@ def _derive_truncated_by(
|
|
| 68 |
hard_cap: bool = False,
|
| 69 |
scan_limit_hit: bool = False,
|
| 70 |
page_limit_hit: bool = False,
|
| 71 |
-
|
| 72 |
) -> str:
|
| 73 |
-
causes = [hard_cap, scan_limit_hit, page_limit_hit,
|
| 74 |
if sum(1 for cause in causes if cause) > 1:
|
| 75 |
return "multiple"
|
| 76 |
if hard_cap:
|
|
@@ -79,8 +79,8 @@ def _derive_truncated_by(
|
|
| 79 |
return "scan_limit"
|
| 80 |
if page_limit_hit:
|
| 81 |
return "page_limit"
|
| 82 |
-
if
|
| 83 |
-
return "
|
| 84 |
return "none"
|
| 85 |
|
| 86 |
|
|
@@ -89,7 +89,7 @@ def _derive_can_request_more(
|
|
| 89 |
) -> bool:
|
| 90 |
if sample_complete:
|
| 91 |
return False
|
| 92 |
-
return truncated_by in {"
|
| 93 |
|
| 94 |
|
| 95 |
def _derive_next_request_hint(
|
|
@@ -97,12 +97,12 @@ def _derive_next_request_hint(
|
|
| 97 |
*,
|
| 98 |
truncated_by: str,
|
| 99 |
more_available: bool | str,
|
| 100 |
-
|
| 101 |
applied_scan_limit: int | None = None,
|
| 102 |
applied_max_pages: int | None = None,
|
| 103 |
) -> str:
|
| 104 |
-
if truncated_by == "
|
| 105 |
-
return f"Ask for
|
| 106 |
if truncated_by == "scan_limit" and applied_scan_limit is not None:
|
| 107 |
return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
|
| 108 |
if truncated_by == "page_limit" and applied_max_pages is not None:
|
|
@@ -121,28 +121,27 @@ def _derive_next_request_hint(
|
|
| 121 |
def _resolve_exhaustive_limits(
|
| 122 |
self: Any,
|
| 123 |
*,
|
| 124 |
-
|
| 125 |
count_only: bool,
|
| 126 |
-
|
| 127 |
-
|
| 128 |
scan_limit: int | None = None,
|
| 129 |
scan_cap: int | None = None,
|
| 130 |
) -> dict[str, Any]:
|
| 131 |
-
|
| 132 |
-
|
| 133 |
out: dict[str, Any] = {
|
| 134 |
-
"
|
| 135 |
-
"
|
| 136 |
-
|
| 137 |
-
default=
|
| 138 |
minimum=0,
|
| 139 |
-
maximum=
|
| 140 |
),
|
| 141 |
-
"default_limit_used":
|
| 142 |
}
|
| 143 |
out["hard_cap_applied"] = (
|
| 144 |
-
|
| 145 |
-
and out["applied_return_limit"] < requested_return_limit
|
| 146 |
)
|
| 147 |
if scan_cap is not None:
|
| 148 |
out["requested_scan_limit"] = scan_limit
|
|
@@ -168,7 +167,7 @@ def _build_exhaustive_meta(
|
|
| 168 |
applied_max_pages: int | None = None,
|
| 169 |
) -> dict[str, Any]:
|
| 170 |
meta = dict(base_meta)
|
| 171 |
-
|
| 172 |
applied_scan_limit = limit_plan.get("applied_scan_limit")
|
| 173 |
meta.update(
|
| 174 |
{
|
|
@@ -186,7 +185,7 @@ def _build_exhaustive_meta(
|
|
| 186 |
self,
|
| 187 |
truncated_by=truncated_by,
|
| 188 |
more_available=more_available,
|
| 189 |
-
|
| 190 |
applied_scan_limit=applied_scan_limit
|
| 191 |
if isinstance(applied_scan_limit, int)
|
| 192 |
else None,
|
|
@@ -197,8 +196,8 @@ def _build_exhaustive_meta(
|
|
| 197 |
meta.update(
|
| 198 |
_derive_limit_metadata(
|
| 199 |
self,
|
| 200 |
-
|
| 201 |
-
|
| 202 |
default_limit_used=bool(limit_plan["default_limit_used"]),
|
| 203 |
requested_scan_limit=limit_plan.get("requested_scan_limit"),
|
| 204 |
applied_scan_limit=applied_scan_limit
|
|
@@ -263,26 +262,26 @@ def _build_exhaustive_result_meta(
|
|
| 263 |
requested_max_pages: int | None = None,
|
| 264 |
applied_max_pages: int | None = None,
|
| 265 |
) -> dict[str, Any]:
|
| 266 |
-
|
| 267 |
if count_only:
|
| 268 |
effective_sample_complete = exact_count
|
| 269 |
else:
|
| 270 |
effective_sample_complete = (
|
| 271 |
sample_complete
|
| 272 |
if isinstance(sample_complete, bool)
|
| 273 |
-
else exact_count and matched_count <=
|
| 274 |
)
|
| 275 |
-
|
| 276 |
False
|
| 277 |
if count_only
|
| 278 |
-
else (
|
| 279 |
)
|
| 280 |
truncated_by = _derive_truncated_by(
|
| 281 |
self,
|
| 282 |
hard_cap=bool(limit_plan.get("hard_cap_applied")),
|
| 283 |
scan_limit_hit=scan_limit_hit,
|
| 284 |
page_limit_hit=page_limit_hit,
|
| 285 |
-
|
| 286 |
)
|
| 287 |
truncated = truncated_by != "none" or truncated_extra
|
| 288 |
total_value = _as_int(base_meta.get("total"))
|
|
|
|
| 21 |
def _derive_limit_metadata(
|
| 22 |
self: Any,
|
| 23 |
*,
|
| 24 |
+
requested_limit: int | None,
|
| 25 |
+
applied_limit: int,
|
| 26 |
default_limit_used: bool,
|
| 27 |
requested_scan_limit: int | None = None,
|
| 28 |
applied_scan_limit: int | None = None,
|
|
|
|
| 30 |
applied_max_pages: int | None = None,
|
| 31 |
) -> dict[str, Any]:
|
| 32 |
meta: dict[str, Any] = {
|
| 33 |
+
"requested_limit": requested_limit,
|
| 34 |
+
"applied_limit": applied_limit,
|
| 35 |
"default_limit_used": default_limit_used,
|
| 36 |
}
|
| 37 |
if requested_scan_limit is not None or applied_scan_limit is not None:
|
|
|
|
| 42 |
meta["requested_max_pages"] = requested_max_pages
|
| 43 |
meta["applied_max_pages"] = applied_max_pages
|
| 44 |
meta["page_limit_applied"] = requested_max_pages != applied_max_pages
|
| 45 |
+
if requested_limit is not None:
|
| 46 |
+
meta["hard_cap_applied"] = applied_limit < requested_limit
|
| 47 |
return meta
|
| 48 |
|
| 49 |
|
|
|
|
| 68 |
hard_cap: bool = False,
|
| 69 |
scan_limit_hit: bool = False,
|
| 70 |
page_limit_hit: bool = False,
|
| 71 |
+
limit_hit: bool = False,
|
| 72 |
) -> str:
|
| 73 |
+
causes = [hard_cap, scan_limit_hit, page_limit_hit, limit_hit]
|
| 74 |
if sum(1 for cause in causes if cause) > 1:
|
| 75 |
return "multiple"
|
| 76 |
if hard_cap:
|
|
|
|
| 79 |
return "scan_limit"
|
| 80 |
if page_limit_hit:
|
| 81 |
return "page_limit"
|
| 82 |
+
if limit_hit:
|
| 83 |
+
return "limit"
|
| 84 |
return "none"
|
| 85 |
|
| 86 |
|
|
|
|
| 89 |
) -> bool:
|
| 90 |
if sample_complete:
|
| 91 |
return False
|
| 92 |
+
return truncated_by in {"limit", "scan_limit", "page_limit", "multiple"}
|
| 93 |
|
| 94 |
|
| 95 |
def _derive_next_request_hint(
|
|
|
|
| 97 |
*,
|
| 98 |
truncated_by: str,
|
| 99 |
more_available: bool | str,
|
| 100 |
+
applied_limit: int,
|
| 101 |
applied_scan_limit: int | None = None,
|
| 102 |
applied_max_pages: int | None = None,
|
| 103 |
) -> str:
|
| 104 |
+
if truncated_by == "limit":
|
| 105 |
+
return f"Ask for limit>{applied_limit} to see more rows"
|
| 106 |
if truncated_by == "scan_limit" and applied_scan_limit is not None:
|
| 107 |
return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
|
| 108 |
if truncated_by == "page_limit" and applied_max_pages is not None:
|
|
|
|
| 121 |
def _resolve_exhaustive_limits(
|
| 122 |
self: Any,
|
| 123 |
*,
|
| 124 |
+
limit: int | None,
|
| 125 |
count_only: bool,
|
| 126 |
+
default_limit: int,
|
| 127 |
+
max_limit: int,
|
| 128 |
scan_limit: int | None = None,
|
| 129 |
scan_cap: int | None = None,
|
| 130 |
) -> dict[str, Any]:
|
| 131 |
+
requested_limit = None if count_only else limit
|
| 132 |
+
effective_requested_limit = 0 if count_only else requested_limit
|
| 133 |
out: dict[str, Any] = {
|
| 134 |
+
"requested_limit": requested_limit,
|
| 135 |
+
"applied_limit": _clamp_int(
|
| 136 |
+
effective_requested_limit,
|
| 137 |
+
default=default_limit,
|
| 138 |
minimum=0,
|
| 139 |
+
maximum=max_limit,
|
| 140 |
),
|
| 141 |
+
"default_limit_used": requested_limit is None and not count_only,
|
| 142 |
}
|
| 143 |
out["hard_cap_applied"] = (
|
| 144 |
+
requested_limit is not None and out["applied_limit"] < requested_limit
|
|
|
|
| 145 |
)
|
| 146 |
if scan_cap is not None:
|
| 147 |
out["requested_scan_limit"] = scan_limit
|
|
|
|
| 167 |
applied_max_pages: int | None = None,
|
| 168 |
) -> dict[str, Any]:
|
| 169 |
meta = dict(base_meta)
|
| 170 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 171 |
applied_scan_limit = limit_plan.get("applied_scan_limit")
|
| 172 |
meta.update(
|
| 173 |
{
|
|
|
|
| 185 |
self,
|
| 186 |
truncated_by=truncated_by,
|
| 187 |
more_available=more_available,
|
| 188 |
+
applied_limit=applied_limit,
|
| 189 |
applied_scan_limit=applied_scan_limit
|
| 190 |
if isinstance(applied_scan_limit, int)
|
| 191 |
else None,
|
|
|
|
| 196 |
meta.update(
|
| 197 |
_derive_limit_metadata(
|
| 198 |
self,
|
| 199 |
+
requested_limit=limit_plan["requested_limit"],
|
| 200 |
+
applied_limit=applied_limit,
|
| 201 |
default_limit_used=bool(limit_plan["default_limit_used"]),
|
| 202 |
requested_scan_limit=limit_plan.get("requested_scan_limit"),
|
| 203 |
applied_scan_limit=applied_scan_limit
|
|
|
|
| 262 |
requested_max_pages: int | None = None,
|
| 263 |
applied_max_pages: int | None = None,
|
| 264 |
) -> dict[str, Any]:
|
| 265 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 266 |
if count_only:
|
| 267 |
effective_sample_complete = exact_count
|
| 268 |
else:
|
| 269 |
effective_sample_complete = (
|
| 270 |
sample_complete
|
| 271 |
if isinstance(sample_complete, bool)
|
| 272 |
+
else exact_count and matched_count <= applied_limit
|
| 273 |
)
|
| 274 |
+
limit_hit = (
|
| 275 |
False
|
| 276 |
if count_only
|
| 277 |
+
else (applied_limit > 0 and matched_count > applied_limit)
|
| 278 |
)
|
| 279 |
truncated_by = _derive_truncated_by(
|
| 280 |
self,
|
| 281 |
hard_cap=bool(limit_plan.get("hard_cap_applied")),
|
| 282 |
scan_limit_hit=scan_limit_hit,
|
| 283 |
page_limit_hit=page_limit_hit,
|
| 284 |
+
limit_hit=limit_hit,
|
| 285 |
)
|
| 286 |
truncated = truncated_by != "none" or truncated_extra
|
| 287 |
total_value = _as_int(base_meta.get("total"))
|
monty_api/runtime_filtering.py
CHANGED
|
@@ -2,40 +2,48 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
from typing import Any
|
| 4 |
|
| 5 |
-
from .
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
)
|
| 14 |
from .http_runtime import _as_int
|
| 15 |
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def _project_items(
|
| 18 |
self: Any,
|
| 19 |
items: list[dict[str, Any]],
|
| 20 |
fields: list[str] | None,
|
| 21 |
-
|
|
|
|
| 22 |
) -> list[dict[str, Any]]:
|
| 23 |
if not isinstance(fields, list) or not fields:
|
| 24 |
return items
|
| 25 |
wanted = [str(field).strip() for field in fields if str(field).strip()]
|
| 26 |
if not wanted:
|
| 27 |
return items
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
if
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
projected: list[dict[str, Any]] = []
|
| 34 |
for row in items:
|
| 35 |
out: dict[str, Any] = {}
|
| 36 |
for key in wanted:
|
| 37 |
-
|
| 38 |
-
value = row.get(source_key)
|
| 39 |
if value is None:
|
| 40 |
continue
|
| 41 |
out[key] = value
|
|
@@ -46,63 +54,88 @@ def _project_items(
|
|
| 46 |
def _project_repo_items(
|
| 47 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 48 |
) -> list[dict[str, Any]]:
|
| 49 |
-
return _project_items(self, items, fields,
|
| 50 |
|
| 51 |
|
| 52 |
def _project_collection_items(
|
| 53 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 54 |
) -> list[dict[str, Any]]:
|
| 55 |
-
return _project_items(
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
def _project_daily_paper_items(
|
| 59 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 60 |
) -> list[dict[str, Any]]:
|
| 61 |
-
return _project_items(
|
|
|
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
def _project_user_items(
|
| 65 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 66 |
) -> list[dict[str, Any]]:
|
| 67 |
-
return _project_items(self, items, fields,
|
| 68 |
|
| 69 |
|
| 70 |
def _project_actor_items(
|
| 71 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 72 |
) -> list[dict[str, Any]]:
|
| 73 |
-
return _project_items(self, items, fields,
|
| 74 |
|
| 75 |
|
| 76 |
def _project_user_like_items(
|
| 77 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 78 |
) -> list[dict[str, Any]]:
|
| 79 |
-
return _project_items(
|
|
|
|
|
|
|
| 80 |
|
| 81 |
|
| 82 |
def _project_activity_items(
|
| 83 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 84 |
) -> list[dict[str, Any]]:
|
| 85 |
-
return _project_items(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def _normalize_where(
|
| 89 |
self: Any,
|
| 90 |
where: dict[str, Any] | None,
|
| 91 |
-
|
|
|
|
| 92 |
) -> dict[str, Any] | None:
|
| 93 |
if not isinstance(where, dict) or not where:
|
| 94 |
return where
|
| 95 |
-
|
| 96 |
-
str(key).strip().lower(): str(value).strip()
|
| 97 |
-
for key, value in (aliases or {}).items()
|
| 98 |
-
if str(key).strip() and str(value).strip()
|
| 99 |
-
}
|
| 100 |
normalized: dict[str, Any] = {}
|
| 101 |
for key, value in where.items():
|
| 102 |
raw_key = str(key).strip()
|
| 103 |
if not raw_key:
|
| 104 |
continue
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
return normalized
|
| 107 |
|
| 108 |
|
|
@@ -161,9 +194,9 @@ def _apply_where(
|
|
| 161 |
items: list[dict[str, Any]],
|
| 162 |
where: dict[str, Any] | None,
|
| 163 |
*,
|
| 164 |
-
|
| 165 |
) -> list[dict[str, Any]]:
|
| 166 |
-
normalized_where = _normalize_where(self, where,
|
| 167 |
if not isinstance(normalized_where, dict) or not normalized_where:
|
| 168 |
return items
|
| 169 |
return [row for row in items if _item_matches_where(self, row, normalized_where)]
|
|
|
|
| 2 |
|
| 3 |
from typing import Any
|
| 4 |
|
| 5 |
+
from .constants import (
|
| 6 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 7 |
+
ACTOR_CANONICAL_FIELDS,
|
| 8 |
+
COLLECTION_CANONICAL_FIELDS,
|
| 9 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 10 |
+
DISCUSSION_CANONICAL_FIELDS,
|
| 11 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 12 |
+
REPO_CANONICAL_FIELDS,
|
| 13 |
+
USER_CANONICAL_FIELDS,
|
| 14 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 15 |
)
|
| 16 |
from .http_runtime import _as_int
|
| 17 |
|
| 18 |
|
| 19 |
+
def _allowed_field_set(allowed_fields: tuple[str, ...] | list[str] | set[str]) -> set[str]:
|
| 20 |
+
return {str(field).strip() for field in allowed_fields if str(field).strip()}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
def _project_items(
|
| 24 |
self: Any,
|
| 25 |
items: list[dict[str, Any]],
|
| 26 |
fields: list[str] | None,
|
| 27 |
+
*,
|
| 28 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 29 |
) -> list[dict[str, Any]]:
|
| 30 |
if not isinstance(fields, list) or not fields:
|
| 31 |
return items
|
| 32 |
wanted = [str(field).strip() for field in fields if str(field).strip()]
|
| 33 |
if not wanted:
|
| 34 |
return items
|
| 35 |
+
if allowed_fields is not None:
|
| 36 |
+
allowed = _allowed_field_set(allowed_fields)
|
| 37 |
+
invalid = sorted(field for field in wanted if field not in allowed)
|
| 38 |
+
if invalid:
|
| 39 |
+
raise ValueError(
|
| 40 |
+
f"Unsupported fields {invalid}. Allowed fields: {sorted(allowed)}"
|
| 41 |
+
)
|
| 42 |
projected: list[dict[str, Any]] = []
|
| 43 |
for row in items:
|
| 44 |
out: dict[str, Any] = {}
|
| 45 |
for key in wanted:
|
| 46 |
+
value = row.get(key)
|
|
|
|
| 47 |
if value is None:
|
| 48 |
continue
|
| 49 |
out[key] = value
|
|
|
|
| 54 |
def _project_repo_items(
|
| 55 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 56 |
) -> list[dict[str, Any]]:
|
| 57 |
+
return _project_items(self, items, fields, allowed_fields=REPO_CANONICAL_FIELDS)
|
| 58 |
|
| 59 |
|
| 60 |
def _project_collection_items(
|
| 61 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 62 |
) -> list[dict[str, Any]]:
|
| 63 |
+
return _project_items(
|
| 64 |
+
self, items, fields, allowed_fields=COLLECTION_CANONICAL_FIELDS
|
| 65 |
+
)
|
| 66 |
|
| 67 |
|
| 68 |
def _project_daily_paper_items(
|
| 69 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 70 |
) -> list[dict[str, Any]]:
|
| 71 |
+
return _project_items(
|
| 72 |
+
self, items, fields, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
|
| 73 |
+
)
|
| 74 |
|
| 75 |
|
| 76 |
def _project_user_items(
|
| 77 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 78 |
) -> list[dict[str, Any]]:
|
| 79 |
+
return _project_items(self, items, fields, allowed_fields=USER_CANONICAL_FIELDS)
|
| 80 |
|
| 81 |
|
| 82 |
def _project_actor_items(
|
| 83 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 84 |
) -> list[dict[str, Any]]:
|
| 85 |
+
return _project_items(self, items, fields, allowed_fields=ACTOR_CANONICAL_FIELDS)
|
| 86 |
|
| 87 |
|
| 88 |
def _project_user_like_items(
|
| 89 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 90 |
) -> list[dict[str, Any]]:
|
| 91 |
+
return _project_items(
|
| 92 |
+
self, items, fields, allowed_fields=USER_LIKES_CANONICAL_FIELDS
|
| 93 |
+
)
|
| 94 |
|
| 95 |
|
| 96 |
def _project_activity_items(
|
| 97 |
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 98 |
) -> list[dict[str, Any]]:
|
| 99 |
+
return _project_items(
|
| 100 |
+
self, items, fields, allowed_fields=ACTIVITY_CANONICAL_FIELDS
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def _project_discussion_items(
|
| 105 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 106 |
+
) -> list[dict[str, Any]]:
|
| 107 |
+
return _project_items(
|
| 108 |
+
self, items, fields, allowed_fields=DISCUSSION_CANONICAL_FIELDS
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _project_discussion_detail_items(
|
| 113 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 114 |
+
) -> list[dict[str, Any]]:
|
| 115 |
+
return _project_items(
|
| 116 |
+
self, items, fields, allowed_fields=DISCUSSION_DETAIL_CANONICAL_FIELDS
|
| 117 |
+
)
|
| 118 |
|
| 119 |
|
| 120 |
def _normalize_where(
|
| 121 |
self: Any,
|
| 122 |
where: dict[str, Any] | None,
|
| 123 |
+
*,
|
| 124 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 125 |
) -> dict[str, Any] | None:
|
| 126 |
if not isinstance(where, dict) or not where:
|
| 127 |
return where
|
| 128 |
+
allowed = _allowed_field_set(allowed_fields) if allowed_fields is not None else None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
normalized: dict[str, Any] = {}
|
| 130 |
for key, value in where.items():
|
| 131 |
raw_key = str(key).strip()
|
| 132 |
if not raw_key:
|
| 133 |
continue
|
| 134 |
+
if allowed is not None and raw_key not in allowed:
|
| 135 |
+
raise ValueError(
|
| 136 |
+
f"Unsupported filter fields {[raw_key]}. Allowed fields: {sorted(allowed)}"
|
| 137 |
+
)
|
| 138 |
+
normalized[raw_key] = value
|
| 139 |
return normalized
|
| 140 |
|
| 141 |
|
|
|
|
| 194 |
items: list[dict[str, Any]],
|
| 195 |
where: dict[str, Any] | None,
|
| 196 |
*,
|
| 197 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 198 |
) -> list[dict[str, Any]]:
|
| 199 |
+
normalized_where = _normalize_where(self, where, allowed_fields=allowed_fields)
|
| 200 |
if not isinstance(normalized_where, dict) or not normalized_where:
|
| 201 |
return items
|
| 202 |
return [row for row in items if _item_matches_where(self, row, normalized_where)]
|
monty_api/tool_entrypoints.py
CHANGED
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
| 5 |
|
| 6 |
import sys
|
| 7 |
from pathlib import Path
|
|
|
|
| 8 |
|
| 9 |
_PACKAGE_DIR = Path(__file__).resolve().parent
|
| 10 |
_ROOT_DIR = _PACKAGE_DIR.parent
|
|
@@ -13,7 +14,40 @@ for candidate in (_ROOT_DIR, _PACKAGE_DIR):
|
|
| 13 |
if candidate_str not in sys.path:
|
| 14 |
sys.path.insert(0, candidate_str)
|
| 15 |
|
| 16 |
-
from monty_api import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
__all__ = [
|
| 19 |
"HELPER_EXTERNALS",
|
|
|
|
| 5 |
|
| 6 |
import sys
|
| 7 |
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
|
| 10 |
_PACKAGE_DIR = Path(__file__).resolve().parent
|
| 11 |
_ROOT_DIR = _PACKAGE_DIR.parent
|
|
|
|
| 14 |
if candidate_str not in sys.path:
|
| 15 |
sys.path.insert(0, candidate_str)
|
| 16 |
|
| 17 |
+
from monty_api import ( # noqa: E402
|
| 18 |
+
HELPER_EXTERNALS,
|
| 19 |
+
hf_hub_query as _hf_hub_query,
|
| 20 |
+
hf_hub_query_raw as _hf_hub_query_raw,
|
| 21 |
+
main,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
async def hf_hub_query(
|
| 26 |
+
query: str,
|
| 27 |
+
code: str,
|
| 28 |
+
max_calls: int | None = None,
|
| 29 |
+
timeout_sec: int | None = None,
|
| 30 |
+
) -> dict[str, Any]:
|
| 31 |
+
return await _hf_hub_query(
|
| 32 |
+
query=query,
|
| 33 |
+
code=code,
|
| 34 |
+
max_calls=max_calls,
|
| 35 |
+
timeout_sec=timeout_sec,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
async def hf_hub_query_raw(
|
| 40 |
+
query: str,
|
| 41 |
+
code: str,
|
| 42 |
+
max_calls: int | None = None,
|
| 43 |
+
timeout_sec: int | None = None,
|
| 44 |
+
) -> Any:
|
| 45 |
+
return await _hf_hub_query_raw(
|
| 46 |
+
query=query,
|
| 47 |
+
code=code,
|
| 48 |
+
max_calls=max_calls,
|
| 49 |
+
timeout_sec=timeout_sec,
|
| 50 |
+
)
|
| 51 |
|
| 52 |
__all__ = [
|
| 53 |
"HELPER_EXTERNALS",
|
monty_api/validation.py
CHANGED
|
@@ -155,8 +155,8 @@ def _summarize_limit_hit(helper_name: str, result: Any) -> dict[str, Any] | None
|
|
| 155 |
"truncated": meta.get("truncated"),
|
| 156 |
"truncated_by": meta.get("truncated_by"),
|
| 157 |
"more_available": meta.get("more_available"),
|
| 158 |
-
"
|
| 159 |
-
"
|
| 160 |
"next_request_hint": meta.get("next_request_hint"),
|
| 161 |
}
|
| 162 |
if meta.get("scan_limit") is not None:
|
|
|
|
| 155 |
"truncated": meta.get("truncated"),
|
| 156 |
"truncated_by": meta.get("truncated_by"),
|
| 157 |
"more_available": meta.get("more_available"),
|
| 158 |
+
"requested_limit": meta.get("requested_limit"),
|
| 159 |
+
"applied_limit": meta.get("applied_limit"),
|
| 160 |
"next_request_hint": meta.get("next_request_hint"),
|
| 161 |
}
|
| 162 |
if meta.get("scan_limit") is not None:
|
tool_entrypoints.py
CHANGED
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
| 5 |
|
| 6 |
import sys
|
| 7 |
from pathlib import Path
|
|
|
|
| 8 |
|
| 9 |
_PACKAGE_DIR = Path(__file__).resolve().parent
|
| 10 |
_ROOT_DIR = _PACKAGE_DIR.parent
|
|
@@ -13,7 +14,40 @@ for candidate in (_ROOT_DIR, _PACKAGE_DIR):
|
|
| 13 |
if candidate_str not in sys.path:
|
| 14 |
sys.path.insert(0, candidate_str)
|
| 15 |
|
| 16 |
-
from monty_api import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
__all__ = [
|
| 19 |
"HELPER_EXTERNALS",
|
|
|
|
| 5 |
|
| 6 |
import sys
|
| 7 |
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
|
| 10 |
_PACKAGE_DIR = Path(__file__).resolve().parent
|
| 11 |
_ROOT_DIR = _PACKAGE_DIR.parent
|
|
|
|
| 14 |
if candidate_str not in sys.path:
|
| 15 |
sys.path.insert(0, candidate_str)
|
| 16 |
|
| 17 |
+
from monty_api import ( # noqa: E402
|
| 18 |
+
HELPER_EXTERNALS,
|
| 19 |
+
hf_hub_query as _hf_hub_query,
|
| 20 |
+
hf_hub_query_raw as _hf_hub_query_raw,
|
| 21 |
+
main,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
async def hf_hub_query(
|
| 26 |
+
query: str,
|
| 27 |
+
code: str,
|
| 28 |
+
max_calls: int | None = None,
|
| 29 |
+
timeout_sec: int | None = None,
|
| 30 |
+
) -> dict[str, Any]:
|
| 31 |
+
return await _hf_hub_query(
|
| 32 |
+
query=query,
|
| 33 |
+
code=code,
|
| 34 |
+
max_calls=max_calls,
|
| 35 |
+
timeout_sec=timeout_sec,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
async def hf_hub_query_raw(
|
| 40 |
+
query: str,
|
| 41 |
+
code: str,
|
| 42 |
+
max_calls: int | None = None,
|
| 43 |
+
timeout_sec: int | None = None,
|
| 44 |
+
) -> Any:
|
| 45 |
+
return await _hf_hub_query_raw(
|
| 46 |
+
query=query,
|
| 47 |
+
code=code,
|
| 48 |
+
max_calls=max_calls,
|
| 49 |
+
timeout_sec=timeout_sec,
|
| 50 |
+
)
|
| 51 |
|
| 52 |
__all__ = [
|
| 53 |
"HELPER_EXTERNALS",
|