Spaces:

evalstate
/

hf-hub-query

Running

App Files Files Community

evalstate HF Staff commited on Mar 20

Commit

cdf6171

verified ·

1 Parent(s): f687774

Deploy committed Monty runtime refactor

Browse files

Files changed (12) hide show

hf-hub-query.md +2 -2
monty_api/aliases.py +0 -88
monty_api/helpers/activity.py +28 -12
monty_api/helpers/profiles.py +92 -51
monty_api/http_runtime.py +88 -16
monty_api/query_entrypoints.py +27 -0
monty_api/runtime_context.py +4 -0
monty_api/runtime_envelopes.py +35 -36
monty_api/runtime_filtering.py +65 -32
monty_api/tool_entrypoints.py +35 -1
monty_api/validation.py +2 -2
tool_entrypoints.py +35 -1

hf-hub-query.md CHANGED Viewed

@@ -4,7 +4,7 @@ name: hf_hub_query
 model: hf.openai/gpt-oss-120b:sambanova
 use_history: false
 default: true
-description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound return_limit, scan_limit, and max_pages for brevity or broader coverage, and the tool can also be asked about its supported helpers, fields, aliases, defaults, and coverage behavior."
 shell: false
 skills: []
 function_tools:
@@ -32,7 +32,7 @@ The user must never see your generated Python unless they explicitly ask for deb
 - The return value of `solve(...)` is the user-facing payload.
 - Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
 - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
-- Do **not** rename `results` to `likes`, `liked_models`, `items`, `rows`, or similar in those composed outputs.
 - Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
 - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
 - Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.

 model: hf.openai/gpt-oss-120b:sambanova
 use_history: false
 default: true
+description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, and max_pages for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
 shell: false
 skills: []
 function_tools:
 - The return value of `solve(...)` is the user-facing payload.
 - Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
 - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
+- Prefer returning outputs directly unless post-processing is required. Do **NOT** rename fields unless asked specifically.
 - Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
 - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
 - Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.

monty_api/aliases.py CHANGED Viewed

@@ -29,91 +29,3 @@ REPO_SORT_KEYS: dict[str, set[str]] = {
         "trending_score",
     },
 }
-# Alias policy:
-# - canonical names stay canonical
-# - support a small compatibility set for observed prompt/output variants
-# - do not add speculative synonyms unless they appear in prompts, evals, or
-#   upstream payloads we already normalize
-SORT_KEY_ALIASES: dict[str, str] = {
-    "createdat": "created_at",
-    "created_at": "created_at",
-    "created-at": "created_at",
-    "downloads": "downloads",
-    "likes": "likes",
-    "lastmodified": "last_modified",
-    "last_modified": "last_modified",
-    "last-modified": "last_modified",
-    "trendingscore": "trending_score",
-    "trending_score": "trending_score",
-    "trending-score": "trending_score",
-    "trending": "trending_score",
-}
-USER_FIELD_ALIASES: dict[str, str] = {
-    "login": "username",
-    "user": "username",
-    "handle": "username",
-    "name": "fullname",
-    "full_name": "fullname",
-    "is_pro": "isPro",
-    "pro": "isPro",
-}
-ACTOR_FIELD_ALIASES: dict[str, str] = {
-    **USER_FIELD_ALIASES,
-    "entity_type": "type",
-    "user_type": "type",
-}
-REPO_FIELD_ALIASES: dict[str, str] = {
-    "repoid": "repo_id",
-    "repotype": "repo_type",
-    "repourl": "repo_url",
-    "createdat": "created_at",
-    "lastmodified": "last_modified",
-    "pipelinetag": "pipeline_tag",
-    "numparams": "num_params",
-    "trendingrank": "trending_rank",
-    "trendingscore": "trending_score",
-    "libraryname": "library_name",
-    "paperswithcodeid": "paperswithcode_id",
-}
-COLLECTION_FIELD_ALIASES: dict[str, str] = {
-    "collectionid": "collection_id",
-    "lastupdated": "last_updated",
-    "ownertype": "owner_type",
-    "itemcount": "item_count",
-    "author": "owner",
-}
-DAILY_PAPER_FIELD_ALIASES: dict[str, str] = {
-    "paperid": "paper_id",
-    "publishedat": "published_at",
-    "submittedondailyat": "submitted_on_daily_at",
-    "submittedby": "submitted_by",
-    "discussionid": "discussion_id",
-    "githubrepo": "github_repo_url",
-    "githubstars": "github_stars",
-    "projectpage": "project_page_url",
-    "numcomments": "num_comments",
-    "isauthorparticipating": "is_author_participating",
-    "repoid": "repo_id",
-}
-USER_LIKES_FIELD_ALIASES: dict[str, str] = {
-    "likedat": "liked_at",
-    "repoid": "repo_id",
-    "repotype": "repo_type",
-    "repoauthor": "repo_author",
-    "repolikes": "repo_likes",
-    "repodownloads": "repo_downloads",
-}
-ACTIVITY_FIELD_ALIASES: dict[str, str] = {
-    "time": "timestamp",
-    "type": "event_type",
-    "repoid": "repo_id",
-    "repotype": "repo_type",
-}

         "trending_score",
     },
 }

monty_api/helpers/activity.py CHANGED Viewed

@@ -4,8 +4,8 @@ from __future__ import annotations
 from functools import partial
 from typing import Any, Callable
-from ..aliases import ACTIVITY_FIELD_ALIASES
 from ..constants import (
     EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
     RECENT_ACTIVITY_PAGE_SIZE,
     RECENT_ACTIVITY_SCAN_MAX_PAGES,
@@ -19,7 +19,7 @@ async def hf_recent_activity(
     entity: str | None = None,
     activity_types: list[str] | None = None,
     repo_types: list[str] | None = None,
-    return_limit: int | None = None,
     max_pages: int | None = None,
     start_cursor: str | None = None,
     count_only: bool = False,
@@ -27,7 +27,7 @@ async def hf_recent_activity(
     fields: list[str] | None = None,
 ) -> dict[str, Any]:
     start_calls = ctx.call_count["n"]
-    default_return = ctx._policy_int("hf_recent_activity", "default_return", 100)
     page_cap = ctx._policy_int(
         "hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
     )
@@ -56,12 +56,12 @@ async def hf_recent_activity(
             error="entity is required",
         )
     limit_plan = ctx._resolve_exhaustive_limits(
-        return_limit=return_limit,
         count_only=count_only,
-        default_return=default_return,
-        max_return=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
     )
-    ret_lim = int(limit_plan["applied_return_limit"])
     page_lim = page_cap
     pages_lim = ctx._clamp_int(
         requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
@@ -85,8 +85,17 @@ async def hf_recent_activity(
     pages = 0
     exhausted_feed = False
     stopped_for_budget = False
-    normalized_where = ctx._normalize_where(where, aliases=ACTIVITY_FIELD_ALIASES)
-    while pages < pages_lim and (ret_lim == 0 or len(items) < ret_lim):
         if ctx._budget_remaining() <= 0:
             stopped_for_budget = True
             break
@@ -147,15 +156,22 @@ async def hf_recent_activity(
             if not ctx._item_matches_where(item, normalized_where):
                 continue
             matched += 1
-            if len(items) < ret_lim:
                 items.append(item)
         if not next_cursor:
             exhausted_feed = True
             break
-    items = ctx._project_activity_items(items, fields)
     exact_count = exhausted_feed and (not stopped_for_budget)
     sample_complete = (
-        exact_count and ret_lim >= matched and (not count_only or matched == 0)
     )
     page_limit_hit = (
         next_cursor is not None and pages >= pages_lim and (not exhausted_feed)

 from functools import partial
 from typing import Any, Callable
 from ..constants import (
+    ACTIVITY_CANONICAL_FIELDS,
     EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
     RECENT_ACTIVITY_PAGE_SIZE,
     RECENT_ACTIVITY_SCAN_MAX_PAGES,
     entity: str | None = None,
     activity_types: list[str] | None = None,
     repo_types: list[str] | None = None,
+    limit: int | None = None,
     max_pages: int | None = None,
     start_cursor: str | None = None,
     count_only: bool = False,
     fields: list[str] | None = None,
 ) -> dict[str, Any]:
     start_calls = ctx.call_count["n"]
+    default_limit = ctx._policy_int("hf_recent_activity", "default_limit", 100)
     page_cap = ctx._policy_int(
         "hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
     )
             error="entity is required",
         )
     limit_plan = ctx._resolve_exhaustive_limits(
+        limit=limit,
         count_only=count_only,
+        default_limit=default_limit,
+        max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
     )
+    applied_limit = int(limit_plan["applied_limit"])
     page_lim = page_cap
     pages_lim = ctx._clamp_int(
         requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
     pages = 0
     exhausted_feed = False
     stopped_for_budget = False
+    try:
+        normalized_where = ctx._normalize_where(
+            where, allowed_fields=ACTIVITY_CANONICAL_FIELDS
+        )
+    except ValueError as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source="/api/recent-activity",
+            error=exc,
+        )
+    while pages < pages_lim and (applied_limit == 0 or len(items) < applied_limit):
         if ctx._budget_remaining() <= 0:
             stopped_for_budget = True
             break
             if not ctx._item_matches_where(item, normalized_where):
                 continue
             matched += 1
+            if len(items) < applied_limit:
                 items.append(item)
         if not next_cursor:
             exhausted_feed = True
             break
+    try:
+        items = ctx._project_activity_items(items, fields)
+    except ValueError as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source="/api/recent-activity",
+            error=exc,
+        )
     exact_count = exhausted_feed and (not stopped_for_budget)
     sample_complete = (
+        exact_count and applied_limit >= matched and (not count_only or matched == 0)
     )
     page_limit_hit = (
         next_cursor is not None and pages >= pages_lim and (not exhausted_feed)

monty_api/helpers/profiles.py CHANGED Viewed

@@ -5,11 +5,8 @@ from itertools import islice
 import re
 from typing import Any, Callable
 from ..context_types import HelperRuntimeContext
-from ..aliases import (
-    ACTOR_FIELD_ALIASES,
-    USER_FIELD_ALIASES,
-)
 from ..constants import (
     EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
     GRAPH_SCAN_LIMIT_CAP,
     OUTPUT_ITEMS_TRUNCATION_LIMIT,
@@ -74,7 +71,7 @@ async def hf_whoami(ctx: HelperRuntimeContext) -> dict[str, Any]:
     item = {
         "username": username,
         "fullname": payload.get("fullname"),
-        "isPro": payload.get("isPro"),
     }
     items = [item] if isinstance(username, str) and username else []
     return ctx._helper_success(
@@ -148,16 +145,16 @@ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[st
         "username": obj.username or u,
         "fullname": obj.fullname,
         "bio": getattr(obj, "details", None),
-        "avatarUrl": obj.avatar_url,
-        "websiteUrl": getattr(obj, "websiteUrl", None),
         "twitter": _social_url("twitter", twitter_handle),
         "github": _social_url("github", github_handle),
         "linkedin": _social_url("linkedin", linkedin_handle),
         "bluesky": _social_url("bluesky", bluesky_handle),
-        "twitterHandle": twitter_handle,
-        "githubHandle": github_handle,
-        "linkedinHandle": linkedin_handle,
-        "blueskyHandle": bluesky_handle,
         "followers": ctx._as_int(obj.num_followers),
         "following": ctx._as_int(obj.num_following),
         "likes": ctx._as_int(obj.num_likes),
@@ -168,7 +165,7 @@ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[st
         "papers": ctx._as_int(getattr(obj, "num_papers", None)),
         "upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
         "orgs": org_names,
-        "isPro": obj.is_pro,
     }
     return ctx._helper_success(
         start_calls=start_calls,
@@ -202,10 +199,10 @@ async def _hf_org_overview(
         return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
     item = {
         "organization": obj.name or org,
-        "displayName": obj.fullname,
-        "avatarUrl": obj.avatar_url,
         "description": obj.details,
-        "websiteUrl": getattr(obj, "websiteUrl", None),
         "followers": ctx._as_int(obj.num_followers),
         "members": ctx._as_int(obj.num_users),
         "models": ctx._as_int(getattr(obj, "num_models", None)),
@@ -226,7 +223,7 @@ async def _hf_org_overview(
 async def hf_org_members(
     ctx: HelperRuntimeContext,
     organization: str,
-    return_limit: int | None = None,
     scan_limit: int | None = None,
     count_only: bool = False,
     where: dict[str, Any] | None = None,
@@ -240,17 +237,17 @@ async def hf_org_members(
             source="/api/organizations/<o>/members",
             error="organization is required",
         )
-    default_return = ctx._policy_int("hf_org_members", "default_return", 100)
     scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
     limit_plan = ctx._resolve_exhaustive_limits(
-        return_limit=return_limit,
         count_only=count_only,
-        default_return=default_return,
-        max_return=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
         scan_limit=scan_limit,
         scan_cap=scan_cap,
     )
-    ret_lim = int(limit_plan["applied_return_limit"])
     scan_lim = int(limit_plan["applied_scan_limit"])
     has_where = isinstance(where, dict) and bool(where)
     overview_total: int | None = None
@@ -299,11 +296,21 @@ async def hf_org_members(
         item = {
             "username": handle,
             "fullname": getattr(row, "fullname", None),
-            "isPro": getattr(row, "is_pro", None),
             "role": getattr(row, "role", None),
         }
         normalized.append(item)
-    normalized = ctx._apply_where(normalized, where, aliases=ACTOR_FIELD_ALIASES)
     observed_total = len(rows)
     scan_exhaustive = observed_total < scan_lim
     overview_list_mismatch = (
@@ -324,14 +331,14 @@ async def hf_org_members(
         total = observed_total
         total_matched = observed_total
     total_available = overview_total if overview_total is not None else observed_total
-    items = normalized[:ret_lim]
     scan_limit_hit = not exact_count and observed_total >= scan_lim
     count_source = (
         "overview" if overview_total is not None and (not has_where) else "scan"
     )
     sample_complete = (
         exact_count
-        and len(normalized) <= ret_lim
         and (not count_only or len(normalized) == 0)
     )
     more_available = ctx._derive_more_available(
@@ -342,7 +349,15 @@ async def hf_org_members(
     )
     if not exact_count and scan_limit_hit:
         more_available = "unknown" if has_where else True
-    items = ctx._project_user_items(items, fields)
     meta = ctx._build_exhaustive_result_meta(
         base_meta={
             "scanned": observed_total,
@@ -375,7 +390,7 @@ async def _user_graph_helper(
     kind: str,
     username: str,
     pro_only: bool | None,
-    return_limit: int | None,
     scan_limit: int | None,
     count_only: bool,
     where: dict[str, Any] | None,
@@ -384,10 +399,10 @@ async def _user_graph_helper(
     helper_name: str,
 ) -> dict[str, Any]:
     start_calls = ctx.call_count["n"]
-    default_return = ctx._policy_int(helper_name, "default_return", 100)
     scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
-    max_return = ctx._policy_int(
-        helper_name, "max_return", EXHAUSTIVE_HELPER_RETURN_HARD_CAP
     )
     u = str(username or "").strip()
     if not u:
@@ -397,14 +412,14 @@ async def _user_graph_helper(
             error="username is required",
         )
     limit_plan = ctx._resolve_exhaustive_limits(
-        return_limit=return_limit,
         count_only=count_only,
-        default_return=default_return,
-        max_return=max_return,
         scan_limit=scan_limit,
         scan_cap=scan_cap,
     )
-    ret_lim = int(limit_plan["applied_return_limit"])
     scan_lim = int(limit_plan["applied_scan_limit"])
     has_where = isinstance(where, dict) and bool(where)
     filtered = pro_only is not None or has_where
@@ -509,14 +524,28 @@ async def _user_graph_helper(
         item = {
             "username": handle,
             "fullname": getattr(row, "fullname", None),
-            "isPro": getattr(row, "is_pro", None),
         }
-        if pro_only is True and item.get("isPro") is not True:
             continue
-        if pro_only is False and item.get("isPro") is True:
             continue
         normalized.append(item)
-    normalized = ctx._apply_where(normalized, where, aliases=USER_FIELD_ALIASES)
     observed_total = len(rows)
     scan_exhaustive = observed_total < scan_lim
     overview_list_mismatch = (
@@ -537,14 +566,14 @@ async def _user_graph_helper(
         total = observed_total
         total_matched = observed_total
     total_available = overview_total if overview_total is not None else observed_total
-    items = normalized[:ret_lim]
     scan_limit_hit = not exact_count and observed_total >= scan_lim
     count_source = (
         "overview" if overview_total is not None and (not filtered) else "scan"
     )
     sample_complete = (
         exact_count
-        and len(normalized) <= ret_lim
         and (not count_only or len(normalized) == 0)
     )
     more_available = ctx._derive_more_available(
@@ -555,7 +584,19 @@ async def _user_graph_helper(
     )
     if not exact_count and scan_limit_hit:
         more_available = "unknown" if filtered else True
-    items = ctx._project_user_items(items, fields)
     meta = ctx._build_exhaustive_result_meta(
         base_meta={
             "scanned": observed_total,
@@ -645,8 +686,8 @@ async def hf_profile_summary(
             "display_name": overview_item.get("fullname")
             or str(overview_item.get("username") or resolved_handle),
             "bio": overview_item.get("bio"),
-            "avatar_url": overview_item.get("avatarUrl"),
-            "website_url": overview_item.get("websiteUrl"),
             "twitter_url": overview_item.get("twitter"),
             "github_url": overview_item.get("github"),
             "linkedin_url": overview_item.get("linkedin"),
@@ -661,13 +702,13 @@ async def hf_profile_summary(
             "papers_count": ctx._overview_count(overview_item, "papers"),
             "upvotes_count": ctx._overview_count(overview_item, "upvotes"),
             "organizations": overview_item.get("orgs"),
-            "is_pro": overview_item.get("isPro"),
         }
         if "likes" in requested_sections:
             likes = await ctx.call_helper(
                 "hf_user_likes",
                 username=resolved_handle,
-                return_limit=likes_lim,
                 scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
                 count_only=likes_lim == 0,
                 sort="liked_at",
@@ -689,7 +730,7 @@ async def hf_profile_summary(
                 "hf_recent_activity",
                 feed_type="user",
                 entity=resolved_handle,
-                return_limit=activity_lim,
                 max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
                 count_only=activity_lim == 0,
                 fields=["timestamp", "event_type", "repo_type", "repo_id"],
@@ -724,11 +765,11 @@ async def hf_profile_summary(
         item = {
             "handle": str(overview_item.get("organization") or resolved_handle),
             "entity_type": "organization",
-            "display_name": overview_item.get("displayName")
             or str(overview_item.get("organization") or resolved_handle),
             "description": overview_item.get("description"),
-            "avatar_url": overview_item.get("avatarUrl"),
-            "website_url": overview_item.get("websiteUrl"),
             "followers_count": ctx._overview_count(overview_item, "followers"),
             "members_count": ctx._overview_count(overview_item, "members"),
             "models_count": ctx._overview_count(overview_item, "models"),
@@ -765,7 +806,7 @@ async def hf_user_graph(
     ctx: HelperRuntimeContext,
     username: str | None = None,
     relation: str = "followers",
-    return_limit: int | None = None,
     scan_limit: int | None = None,
     count_only: bool = False,
     pro_only: bool | None = None,
@@ -800,7 +841,7 @@ async def hf_user_graph(
         rel,
         resolved_username,
         pro_only,
-        return_limit,
         scan_limit,
         count_only,
         where,

 import re
 from typing import Any, Callable
 from ..context_types import HelperRuntimeContext
 from ..constants import (
+    ACTOR_CANONICAL_FIELDS,
     EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
     GRAPH_SCAN_LIMIT_CAP,
     OUTPUT_ITEMS_TRUNCATION_LIMIT,
     item = {
         "username": username,
         "fullname": payload.get("fullname"),
+        "is_pro": payload.get("isPro"),
     }
     items = [item] if isinstance(username, str) and username else []
     return ctx._helper_success(
         "username": obj.username or u,
         "fullname": obj.fullname,
         "bio": getattr(obj, "details", None),
+        "avatar_url": obj.avatar_url,
+        "website_url": getattr(obj, "websiteUrl", None),
         "twitter": _social_url("twitter", twitter_handle),
         "github": _social_url("github", github_handle),
         "linkedin": _social_url("linkedin", linkedin_handle),
         "bluesky": _social_url("bluesky", bluesky_handle),
+        "twitter_handle": twitter_handle,
+        "github_handle": github_handle,
+        "linkedin_handle": linkedin_handle,
+        "bluesky_handle": bluesky_handle,
         "followers": ctx._as_int(obj.num_followers),
         "following": ctx._as_int(obj.num_following),
         "likes": ctx._as_int(obj.num_likes),
         "papers": ctx._as_int(getattr(obj, "num_papers", None)),
         "upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
         "orgs": org_names,
+        "is_pro": obj.is_pro,
     }
     return ctx._helper_success(
         start_calls=start_calls,
         return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
     item = {
         "organization": obj.name or org,
+        "display_name": obj.fullname,
+        "avatar_url": obj.avatar_url,
         "description": obj.details,
+        "website_url": getattr(obj, "websiteUrl", None),
         "followers": ctx._as_int(obj.num_followers),
         "members": ctx._as_int(obj.num_users),
         "models": ctx._as_int(getattr(obj, "num_models", None)),
 async def hf_org_members(
     ctx: HelperRuntimeContext,
     organization: str,
+    limit: int | None = None,
     scan_limit: int | None = None,
     count_only: bool = False,
     where: dict[str, Any] | None = None,
             source="/api/organizations/<o>/members",
             error="organization is required",
         )
+    default_limit = ctx._policy_int("hf_org_members", "default_limit", 100)
     scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
     limit_plan = ctx._resolve_exhaustive_limits(
+        limit=limit,
         count_only=count_only,
+        default_limit=default_limit,
+        max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
         scan_limit=scan_limit,
         scan_cap=scan_cap,
     )
+    applied_limit = int(limit_plan["applied_limit"])
     scan_lim = int(limit_plan["applied_scan_limit"])
     has_where = isinstance(where, dict) and bool(where)
     overview_total: int | None = None
         item = {
             "username": handle,
             "fullname": getattr(row, "fullname", None),
+            "is_pro": getattr(row, "is_pro", None),
             "role": getattr(row, "role", None),
         }
         normalized.append(item)
+    try:
+        normalized = ctx._apply_where(
+            normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
+        )
+    except ValueError as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source=endpoint,
+            error=exc,
+            organization=org,
+        )
     observed_total = len(rows)
     scan_exhaustive = observed_total < scan_lim
     overview_list_mismatch = (
         total = observed_total
         total_matched = observed_total
     total_available = overview_total if overview_total is not None else observed_total
+    items = normalized[:applied_limit]
     scan_limit_hit = not exact_count and observed_total >= scan_lim
     count_source = (
         "overview" if overview_total is not None and (not has_where) else "scan"
     )
     sample_complete = (
         exact_count
+        and len(normalized) <= applied_limit
         and (not count_only or len(normalized) == 0)
     )
     more_available = ctx._derive_more_available(
     )
     if not exact_count and scan_limit_hit:
         more_available = "unknown" if has_where else True
+    try:
+        items = ctx._project_actor_items(items, fields)
+    except ValueError as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source=endpoint,
+            error=exc,
+            organization=org,
+        )
     meta = ctx._build_exhaustive_result_meta(
         base_meta={
             "scanned": observed_total,
     kind: str,
     username: str,
     pro_only: bool | None,
+    limit: int | None,
     scan_limit: int | None,
     count_only: bool,
     where: dict[str, Any] | None,
     helper_name: str,
 ) -> dict[str, Any]:
     start_calls = ctx.call_count["n"]
+    default_limit = ctx._policy_int(helper_name, "default_limit", 100)
     scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
+    max_limit = ctx._policy_int(
+        helper_name, "max_limit", EXHAUSTIVE_HELPER_RETURN_HARD_CAP
     )
     u = str(username or "").strip()
     if not u:
             error="username is required",
         )
     limit_plan = ctx._resolve_exhaustive_limits(
+        limit=limit,
         count_only=count_only,
+        default_limit=default_limit,
+        max_limit=max_limit,
         scan_limit=scan_limit,
         scan_cap=scan_cap,
     )
+    applied_limit = int(limit_plan["applied_limit"])
     scan_lim = int(limit_plan["applied_scan_limit"])
     has_where = isinstance(where, dict) and bool(where)
     filtered = pro_only is not None or has_where
         item = {
             "username": handle,
             "fullname": getattr(row, "fullname", None),
+            "is_pro": getattr(row, "is_pro", None),
         }
+        if pro_only is True and item.get("is_pro") is not True:
             continue
+        if pro_only is False and item.get("is_pro") is True:
             continue
         normalized.append(item)
+    try:
+        normalized = ctx._apply_where(
+            normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
+        )
+    except ValueError as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source=endpoint,
+            error=exc,
+            relation=kind,
+            username=u,
+            entity=u,
+            entity_type=entity_type,
+            organization=u if entity_type == "organization" else None,
+        )
     observed_total = len(rows)
     scan_exhaustive = observed_total < scan_lim
     overview_list_mismatch = (
         total = observed_total
         total_matched = observed_total
     total_available = overview_total if overview_total is not None else observed_total
+    items = normalized[:applied_limit]
     scan_limit_hit = not exact_count and observed_total >= scan_lim
     count_source = (
         "overview" if overview_total is not None and (not filtered) else "scan"
     )
     sample_complete = (
         exact_count
+        and len(normalized) <= applied_limit
         and (not count_only or len(normalized) == 0)
     )
     more_available = ctx._derive_more_available(
     )
     if not exact_count and scan_limit_hit:
         more_available = "unknown" if filtered else True
+    try:
+        items = ctx._project_actor_items(items, fields)
+    except ValueError as exc:
+        return ctx._helper_error(
+            start_calls=start_calls,
+            source=endpoint,
+            error=exc,
+            relation=kind,
+            username=u,
+            entity=u,
+            entity_type=entity_type,
+            organization=u if entity_type == "organization" else None,
+        )
     meta = ctx._build_exhaustive_result_meta(
         base_meta={
             "scanned": observed_total,
             "display_name": overview_item.get("fullname")
             or str(overview_item.get("username") or resolved_handle),
             "bio": overview_item.get("bio"),
+            "avatar_url": overview_item.get("avatar_url"),
+            "website_url": overview_item.get("website_url"),
             "twitter_url": overview_item.get("twitter"),
             "github_url": overview_item.get("github"),
             "linkedin_url": overview_item.get("linkedin"),
             "papers_count": ctx._overview_count(overview_item, "papers"),
             "upvotes_count": ctx._overview_count(overview_item, "upvotes"),
             "organizations": overview_item.get("orgs"),
+            "is_pro": overview_item.get("is_pro"),
         }
         if "likes" in requested_sections:
             likes = await ctx.call_helper(
                 "hf_user_likes",
                 username=resolved_handle,
+                limit=likes_lim,
                 scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
                 count_only=likes_lim == 0,
                 sort="liked_at",
                 "hf_recent_activity",
                 feed_type="user",
                 entity=resolved_handle,
+                limit=activity_lim,
                 max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
                 count_only=activity_lim == 0,
                 fields=["timestamp", "event_type", "repo_type", "repo_id"],
         item = {
             "handle": str(overview_item.get("organization") or resolved_handle),
             "entity_type": "organization",
+            "display_name": overview_item.get("display_name")
             or str(overview_item.get("organization") or resolved_handle),
             "description": overview_item.get("description"),
+            "avatar_url": overview_item.get("avatar_url"),
+            "website_url": overview_item.get("website_url"),
             "followers_count": ctx._overview_count(overview_item, "followers"),
             "members_count": ctx._overview_count(overview_item, "members"),
             "models_count": ctx._overview_count(overview_item, "models"),
     ctx: HelperRuntimeContext,
     username: str | None = None,
     relation: str = "followers",
+    limit: int | None = None,
     scan_limit: int | None = None,
     count_only: bool = False,
     pro_only: bool | None = None,
         rel,
         resolved_username,
         pro_only,
+        limit,
         scan_limit,
         count_only,
         where,

monty_api/http_runtime.py CHANGED Viewed

@@ -9,11 +9,11 @@ from urllib.request import Request, urlopen
 from huggingface_hub import HfApi
-from .aliases import REPO_SORT_KEYS, SORT_KEY_ALIASES
 from .constants import (
     DEFAULT_TIMEOUT_SEC,
 )
-from .registry import REPO_API_ADAPTERS
 from .validation import _endpoint_allowed, _normalize_endpoint, _sanitize_params
@@ -78,10 +78,14 @@ def _normalize_repo_sort_key(
     if not raw:
         return None, None
-    key = SORT_KEY_ALIASES.get(raw.lower().replace(" ", "").replace("__", "_"))
-    if key is None:
-        key = SORT_KEY_ALIASES.get(raw.lower())
-    if key is None:
         return None, f"Invalid sort key '{raw}'"
     rt = _canonical_repo_type(repo_type)
@@ -111,6 +115,8 @@ def _repo_list_call(api: HfApi, repo_type: str, **kwargs: Any) -> list[Any]:
 def _repo_detail_call(api: HfApi, repo_type: str, repo_id: str) -> Any:
     adapter = _repo_api_adapter(repo_type)
     method = getattr(api, adapter.detail_method_name)
     return method(repo_id)
@@ -138,6 +144,43 @@ def _optional_str_list(value: Any) -> list[str] | None:
     return None
 def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int | None:
     direct = _as_int(num_params)
     if direct is not None:
@@ -149,6 +192,24 @@ def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int
     return _as_int(total)
 def _extract_author_names(value: Any) -> list[str] | None:
     if not isinstance(value, (list, tuple)):
         return None
@@ -242,6 +303,8 @@ def _build_repo_row(
     models: Any = None,
     datasets: Any = None,
     subdomain: Any = None,
 ) -> dict[str, Any]:
     rt = _canonical_repo_type(repo_type)
     author_value = author
@@ -252,6 +315,15 @@ def _build_repo_row(
     ):
         author_value = repo_id.split("/", 1)[0]
     return {
         "id": repo_id,
         "slug": repo_id,
@@ -279,6 +351,8 @@ def _build_repo_row(
         "models": _optional_str_list(models),
         "datasets": _optional_str_list(datasets),
         "subdomain": subdomain,
     }
@@ -292,9 +366,7 @@ def _normalize_repo_search_row(row: Any, repo_type: str) -> dict[str, Any]:
         created_at=getattr(row, "created_at", None),
         last_modified=getattr(row, "last_modified", None),
         pipeline_tag=getattr(row, "pipeline_tag", None),
-        num_params=_extract_num_params(
-            getattr(row, "num_params", None), getattr(row, "safetensors", None)
-        ),
         private=getattr(row, "private", None),
         trending_score=getattr(row, "trending_score", None),
         tags=getattr(row, "tags", None),
@@ -307,6 +379,7 @@ def _normalize_repo_search_row(row: Any, repo_type: str) -> dict[str, Any]:
         models=getattr(row, "models", None),
         datasets=getattr(row, "datasets", None),
         subdomain=getattr(row, "subdomain", None),
     )
@@ -325,11 +398,6 @@ def _normalize_repo_detail_row(
 def _normalize_trending_row(
     repo: dict[str, Any], default_repo_type: str, rank: int | None = None
 ) -> dict[str, Any]:
-    raw_num_params = (
-        repo.get("num_params")
-        if repo.get("num_params") is not None
-        else repo.get("numParameters")
-    )
     row = _build_repo_row(
         repo_id=repo.get("id"),
         repo_type=repo.get("type") or repo.get("repoType") or default_repo_type,
@@ -339,7 +407,7 @@ def _normalize_trending_row(
         created_at=repo.get("createdAt"),
         last_modified=repo.get("lastModified"),
         pipeline_tag=repo.get("pipeline_tag"),
-        num_params=_extract_num_params(raw_num_params, repo.get("safetensors")),
         private=repo.get("private"),
         trending_score=repo.get("trendingScore"),
         tags=repo.get("tags"),
@@ -352,6 +420,8 @@ def _normalize_trending_row(
         models=repo.get("models"),
         datasets=repo.get("datasets"),
         subdomain=repo.get("subdomain"),
     )
     if rank is not None:
         row["trending_rank"] = rank
@@ -419,7 +489,7 @@ def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | Non
         created_at=row.get("createdAt") or row.get("created_at"),
         last_modified=row.get("lastModified") or row.get("last_modified"),
         pipeline_tag=row.get("pipeline_tag") or row.get("pipelineTag"),
-        num_params=_extract_num_params(row.get("num_params"), row.get("safetensors")),
         private=row.get("private"),
         tags=row.get("tags"),
         gated=row.get("gated"),
@@ -430,6 +500,8 @@ def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | Non
         models=row.get("models"),
         datasets=row.get("datasets"),
         subdomain=row.get("subdomain"),
     )

 from huggingface_hub import HfApi
+from .aliases import REPO_SORT_KEYS
 from .constants import (
     DEFAULT_TIMEOUT_SEC,
 )
+from .registry import REPO_API_ADAPTERS, REPO_SEARCH_DEFAULT_EXPAND
 from .validation import _endpoint_allowed, _normalize_endpoint, _sanitize_params
     if not raw:
         return None, None
+    key = raw
+    if key not in {
+        "created_at",
+        "downloads",
+        "last_modified",
+        "likes",
+        "trending_score",
+    }:
         return None, f"Invalid sort key '{raw}'"
     rt = _canonical_repo_type(repo_type)
 def _repo_detail_call(api: HfApi, repo_type: str, repo_id: str) -> Any:
     adapter = _repo_api_adapter(repo_type)
     method = getattr(api, adapter.detail_method_name)
+    if _canonical_repo_type(repo_type) == "space":
+        return method(repo_id, expand=list(REPO_SEARCH_DEFAULT_EXPAND["space"]))
     return method(repo_id)
     return None
+def _space_runtime_to_dict(value: Any) -> dict[str, Any] | None:
+    if value is None:
+        return None
+    if isinstance(value, dict):
+        raw = value
+        hardware = raw.get("hardware")
+        current_hardware = (
+            hardware.get("current") if isinstance(hardware, dict) else hardware
+        )
+        requested_hardware = (
+            hardware.get("requested")
+            if isinstance(hardware, dict)
+            else raw.get("requested_hardware") or raw.get("requestedHardware")
+        )
+        sleep_time = _as_int(
+            raw.get("gcTimeout")
+            if raw.get("gcTimeout") is not None
+            else raw.get("sleep_time") or raw.get("sleepTime")
+        )
+        out = {
+            "stage": raw.get("stage"),
+            "hardware": current_hardware,
+            "requested_hardware": requested_hardware,
+            "sleep_time": sleep_time,
+        }
+        return {key: val for key, val in out.items() if val is not None} or None
+    out = {
+        "stage": getattr(value, "stage", None),
+        "hardware": getattr(value, "hardware", None),
+        "requested_hardware": getattr(value, "requested_hardware", None),
+        "sleep_time": _as_int(getattr(value, "sleep_time", None)),
+    }
+    return {key: val for key, val in out.items() if val is not None} or None
 def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int | None:
     direct = _as_int(num_params)
     if direct is not None:
     return _as_int(total)
+def _extract_num_params_from_object(row: Any) -> int | None:
+    raw_num_params = getattr(row, "num_params", None)
+    if raw_num_params is None:
+        raw_num_params = getattr(row, "numParameters", None)
+    if raw_num_params is None:
+        raw_num_params = getattr(row, "num_parameters", None)
+    return _extract_num_params(raw_num_params, getattr(row, "safetensors", None))
+def _extract_num_params_from_dict(row: dict[str, Any]) -> int | None:
+    raw_num_params = row.get("num_params")
+    if raw_num_params is None:
+        raw_num_params = row.get("numParameters")
+    if raw_num_params is None:
+        raw_num_params = row.get("num_parameters")
+    return _extract_num_params(raw_num_params, row.get("safetensors"))
 def _extract_author_names(value: Any) -> list[str] | None:
     if not isinstance(value, (list, tuple)):
         return None
     models: Any = None,
     datasets: Any = None,
     subdomain: Any = None,
+    runtime: Any = None,
+    runtime_stage: Any = None,
 ) -> dict[str, Any]:
     rt = _canonical_repo_type(repo_type)
     author_value = author
     ):
         author_value = repo_id.split("/", 1)[0]
+    runtime_payload = _space_runtime_to_dict(runtime)
+    resolved_runtime_stage = (
+        runtime_stage
+        if runtime_stage is not None
+        else runtime_payload.get("stage")
+        if isinstance(runtime_payload, dict)
+        else None
+    )
     return {
         "id": repo_id,
         "slug": repo_id,
         "models": _optional_str_list(models),
         "datasets": _optional_str_list(datasets),
         "subdomain": subdomain,
+        "runtime_stage": resolved_runtime_stage,
+        "runtime": runtime_payload,
     }
         created_at=getattr(row, "created_at", None),
         last_modified=getattr(row, "last_modified", None),
         pipeline_tag=getattr(row, "pipeline_tag", None),
+        num_params=_extract_num_params_from_object(row),
         private=getattr(row, "private", None),
         trending_score=getattr(row, "trending_score", None),
         tags=getattr(row, "tags", None),
         models=getattr(row, "models", None),
         datasets=getattr(row, "datasets", None),
         subdomain=getattr(row, "subdomain", None),
+        runtime=getattr(row, "runtime", None),
     )
 def _normalize_trending_row(
     repo: dict[str, Any], default_repo_type: str, rank: int | None = None
 ) -> dict[str, Any]:
     row = _build_repo_row(
         repo_id=repo.get("id"),
         repo_type=repo.get("type") or repo.get("repoType") or default_repo_type,
         created_at=repo.get("createdAt"),
         last_modified=repo.get("lastModified"),
         pipeline_tag=repo.get("pipeline_tag"),
+        num_params=_extract_num_params_from_dict(repo),
         private=repo.get("private"),
         trending_score=repo.get("trendingScore"),
         tags=repo.get("tags"),
         models=repo.get("models"),
         datasets=repo.get("datasets"),
         subdomain=repo.get("subdomain"),
+        runtime=repo.get("runtime"),
+        runtime_stage=repo.get("runtime_stage") or repo.get("runtimeStage"),
     )
     if rank is not None:
         row["trending_rank"] = rank
         created_at=row.get("createdAt") or row.get("created_at"),
         last_modified=row.get("lastModified") or row.get("last_modified"),
         pipeline_tag=row.get("pipeline_tag") or row.get("pipelineTag"),
+        num_params=_extract_num_params_from_dict(row),
         private=row.get("private"),
         tags=row.get("tags"),
         gated=row.get("gated"),
         models=row.get("models"),
         datasets=row.get("datasets"),
         subdomain=row.get("subdomain"),
+        runtime=row.get("runtime"),
+        runtime_stage=row.get("runtime_stage") or row.get("runtimeStage"),
     )

monty_api/query_entrypoints.py CHANGED Viewed

@@ -4,6 +4,8 @@ import argparse
 import asyncio
 import inspect
 import json
 import time
 from typing import Any, Callable
@@ -33,6 +35,25 @@ class MontyExecutionError(RuntimeError):
         self.trace = trace
 def _introspect_helper_signatures() -> dict[str, set[str]]:
     env = build_runtime_helper_environment(
         max_calls=DEFAULT_MAX_CALLS,
@@ -213,6 +234,12 @@ async def _execute_query(
             timeout_sec=timeout_sec,
         )
     )
     return await _run_with_monty(
         code=prepared_code,
         query=prepared_query,

 import asyncio
 import inspect
 import json
+import os
+import sys
 import time
 from typing import Any, Callable
         self.trace = trace
+def _query_debug_enabled() -> bool:
+    value = os.environ.get("MONTY_DEBUG_QUERY", "")
+    return value.strip().lower() in {"1", "true", "yes", "on"}
+def _log_generated_query(
+    *, query: str, code: str, max_calls: int | None, timeout_sec: int | None
+) -> None:
+    if not _query_debug_enabled():
+        return
+    print("[monty-debug] query:", file=sys.stderr)
+    print(query, file=sys.stderr)
+    print("[monty-debug] max_calls:", max_calls, file=sys.stderr)
+    print("[monty-debug] timeout_sec:", timeout_sec, file=sys.stderr)
+    print("[monty-debug] code:", file=sys.stderr)
+    print(code, file=sys.stderr)
+    sys.stderr.flush()
 def _introspect_helper_signatures() -> dict[str, set[str]]:
     env = build_runtime_helper_environment(
         max_calls=DEFAULT_MAX_CALLS,
             timeout_sec=timeout_sec,
         )
     )
+    _log_generated_query(
+        query=prepared_query,
+        code=prepared_code,
+        max_calls=prepared_max_calls,
+        timeout_sec=prepared_timeout,
+    )
     return await _run_with_monty(
         code=prepared_code,
         query=prepared_query,

monty_api/runtime_context.py CHANGED Viewed

@@ -60,6 +60,8 @@ from .runtime_filtering import (
     _project_activity_items,
     _project_actor_items,
     _project_collection_items,
     _project_daily_paper_items,
     _project_items,
     _project_repo_items,
@@ -215,6 +217,8 @@ for name, value in {
     "_project_items": _project_items,
     "_project_repo_items": _project_repo_items,
     "_project_collection_items": _project_collection_items,
     "_project_daily_paper_items": _project_daily_paper_items,
     "_project_user_items": _project_user_items,
     "_project_actor_items": _project_actor_items,

     _project_activity_items,
     _project_actor_items,
     _project_collection_items,
+    _project_discussion_detail_items,
+    _project_discussion_items,
     _project_daily_paper_items,
     _project_items,
     _project_repo_items,
     "_project_items": _project_items,
     "_project_repo_items": _project_repo_items,
     "_project_collection_items": _project_collection_items,
+    "_project_discussion_items": _project_discussion_items,
+    "_project_discussion_detail_items": _project_discussion_detail_items,
     "_project_daily_paper_items": _project_daily_paper_items,
     "_project_user_items": _project_user_items,
     "_project_actor_items": _project_actor_items,

monty_api/runtime_envelopes.py CHANGED Viewed

@@ -21,8 +21,8 @@ def _helper_meta(
 def _derive_limit_metadata(
     self: Any,
     *,
-    requested_return_limit: int | None,
-    applied_return_limit: int,
     default_limit_used: bool,
     requested_scan_limit: int | None = None,
     applied_scan_limit: int | None = None,
@@ -30,8 +30,8 @@ def _derive_limit_metadata(
     applied_max_pages: int | None = None,
 ) -> dict[str, Any]:
     meta: dict[str, Any] = {
-        "requested_return_limit": requested_return_limit,
-        "applied_return_limit": applied_return_limit,
         "default_limit_used": default_limit_used,
     }
     if requested_scan_limit is not None or applied_scan_limit is not None:
@@ -42,8 +42,8 @@ def _derive_limit_metadata(
         meta["requested_max_pages"] = requested_max_pages
         meta["applied_max_pages"] = applied_max_pages
         meta["page_limit_applied"] = requested_max_pages != applied_max_pages
-    if requested_return_limit is not None:
-        meta["hard_cap_applied"] = applied_return_limit < requested_return_limit
     return meta
@@ -68,9 +68,9 @@ def _derive_truncated_by(
     hard_cap: bool = False,
     scan_limit_hit: bool = False,
     page_limit_hit: bool = False,
-    return_limit_hit: bool = False,
 ) -> str:
-    causes = [hard_cap, scan_limit_hit, page_limit_hit, return_limit_hit]
     if sum(1 for cause in causes if cause) > 1:
         return "multiple"
     if hard_cap:
@@ -79,8 +79,8 @@ def _derive_truncated_by(
         return "scan_limit"
     if page_limit_hit:
         return "page_limit"
-    if return_limit_hit:
-        return "return_limit"
     return "none"
@@ -89,7 +89,7 @@ def _derive_can_request_more(
 ) -> bool:
     if sample_complete:
         return False
-    return truncated_by in {"return_limit", "scan_limit", "page_limit", "multiple"}
 def _derive_next_request_hint(
@@ -97,12 +97,12 @@ def _derive_next_request_hint(
     *,
     truncated_by: str,
     more_available: bool | str,
-    applied_return_limit: int,
     applied_scan_limit: int | None = None,
     applied_max_pages: int | None = None,
 ) -> str:
-    if truncated_by == "return_limit":
-        return f"Ask for return_limit>{applied_return_limit} to see more rows"
     if truncated_by == "scan_limit" and applied_scan_limit is not None:
         return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
     if truncated_by == "page_limit" and applied_max_pages is not None:
@@ -121,28 +121,27 @@ def _derive_next_request_hint(
 def _resolve_exhaustive_limits(
     self: Any,
     *,
-    return_limit: int | None,
     count_only: bool,
-    default_return: int,
-    max_return: int,
     scan_limit: int | None = None,
     scan_cap: int | None = None,
 ) -> dict[str, Any]:
-    requested_return_limit = None if count_only else return_limit
-    effective_requested_return_limit = 0 if count_only else requested_return_limit
     out: dict[str, Any] = {
-        "requested_return_limit": requested_return_limit,
-        "applied_return_limit": _clamp_int(
-            effective_requested_return_limit,
-            default=default_return,
             minimum=0,
-            maximum=max_return,
         ),
-        "default_limit_used": requested_return_limit is None and not count_only,
     }
     out["hard_cap_applied"] = (
-        requested_return_limit is not None
-        and out["applied_return_limit"] < requested_return_limit
     )
     if scan_cap is not None:
         out["requested_scan_limit"] = scan_limit
@@ -168,7 +167,7 @@ def _build_exhaustive_meta(
     applied_max_pages: int | None = None,
 ) -> dict[str, Any]:
     meta = dict(base_meta)
-    applied_return_limit = int(limit_plan["applied_return_limit"])
     applied_scan_limit = limit_plan.get("applied_scan_limit")
     meta.update(
         {
@@ -186,7 +185,7 @@ def _build_exhaustive_meta(
                 self,
                 truncated_by=truncated_by,
                 more_available=more_available,
-                applied_return_limit=applied_return_limit,
                 applied_scan_limit=applied_scan_limit
                 if isinstance(applied_scan_limit, int)
                 else None,
@@ -197,8 +196,8 @@ def _build_exhaustive_meta(
     meta.update(
         _derive_limit_metadata(
             self,
-            requested_return_limit=limit_plan["requested_return_limit"],
-            applied_return_limit=applied_return_limit,
             default_limit_used=bool(limit_plan["default_limit_used"]),
             requested_scan_limit=limit_plan.get("requested_scan_limit"),
             applied_scan_limit=applied_scan_limit
@@ -263,26 +262,26 @@ def _build_exhaustive_result_meta(
     requested_max_pages: int | None = None,
     applied_max_pages: int | None = None,
 ) -> dict[str, Any]:
-    applied_return_limit = int(limit_plan["applied_return_limit"])
     if count_only:
         effective_sample_complete = exact_count
     else:
         effective_sample_complete = (
             sample_complete
             if isinstance(sample_complete, bool)
-            else exact_count and matched_count <= applied_return_limit
         )
-    return_limit_hit = (
         False
         if count_only
-        else (applied_return_limit > 0 and matched_count > applied_return_limit)
     )
     truncated_by = _derive_truncated_by(
         self,
         hard_cap=bool(limit_plan.get("hard_cap_applied")),
         scan_limit_hit=scan_limit_hit,
         page_limit_hit=page_limit_hit,
-        return_limit_hit=return_limit_hit,
     )
     truncated = truncated_by != "none" or truncated_extra
     total_value = _as_int(base_meta.get("total"))

 def _derive_limit_metadata(
     self: Any,
     *,
+    requested_limit: int | None,
+    applied_limit: int,
     default_limit_used: bool,
     requested_scan_limit: int | None = None,
     applied_scan_limit: int | None = None,
     applied_max_pages: int | None = None,
 ) -> dict[str, Any]:
     meta: dict[str, Any] = {
+        "requested_limit": requested_limit,
+        "applied_limit": applied_limit,
         "default_limit_used": default_limit_used,
     }
     if requested_scan_limit is not None or applied_scan_limit is not None:
         meta["requested_max_pages"] = requested_max_pages
         meta["applied_max_pages"] = applied_max_pages
         meta["page_limit_applied"] = requested_max_pages != applied_max_pages
+    if requested_limit is not None:
+        meta["hard_cap_applied"] = applied_limit < requested_limit
     return meta
     hard_cap: bool = False,
     scan_limit_hit: bool = False,
     page_limit_hit: bool = False,
+    limit_hit: bool = False,
 ) -> str:
+    causes = [hard_cap, scan_limit_hit, page_limit_hit, limit_hit]
     if sum(1 for cause in causes if cause) > 1:
         return "multiple"
     if hard_cap:
         return "scan_limit"
     if page_limit_hit:
         return "page_limit"
+    if limit_hit:
+        return "limit"
     return "none"
 ) -> bool:
     if sample_complete:
         return False
+    return truncated_by in {"limit", "scan_limit", "page_limit", "multiple"}
 def _derive_next_request_hint(
     *,
     truncated_by: str,
     more_available: bool | str,
+    applied_limit: int,
     applied_scan_limit: int | None = None,
     applied_max_pages: int | None = None,
 ) -> str:
+    if truncated_by == "limit":
+        return f"Ask for limit>{applied_limit} to see more rows"
     if truncated_by == "scan_limit" and applied_scan_limit is not None:
         return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
     if truncated_by == "page_limit" and applied_max_pages is not None:
 def _resolve_exhaustive_limits(
     self: Any,
     *,
+    limit: int | None,
     count_only: bool,
+    default_limit: int,
+    max_limit: int,
     scan_limit: int | None = None,
     scan_cap: int | None = None,
 ) -> dict[str, Any]:
+    requested_limit = None if count_only else limit
+    effective_requested_limit = 0 if count_only else requested_limit
     out: dict[str, Any] = {
+        "requested_limit": requested_limit,
+        "applied_limit": _clamp_int(
+            effective_requested_limit,
+            default=default_limit,
             minimum=0,
+            maximum=max_limit,
         ),
+        "default_limit_used": requested_limit is None and not count_only,
     }
     out["hard_cap_applied"] = (
+        requested_limit is not None and out["applied_limit"] < requested_limit
     )
     if scan_cap is not None:
         out["requested_scan_limit"] = scan_limit
     applied_max_pages: int | None = None,
 ) -> dict[str, Any]:
     meta = dict(base_meta)
+    applied_limit = int(limit_plan["applied_limit"])
     applied_scan_limit = limit_plan.get("applied_scan_limit")
     meta.update(
         {
                 self,
                 truncated_by=truncated_by,
                 more_available=more_available,
+                applied_limit=applied_limit,
                 applied_scan_limit=applied_scan_limit
                 if isinstance(applied_scan_limit, int)
                 else None,
     meta.update(
         _derive_limit_metadata(
             self,
+            requested_limit=limit_plan["requested_limit"],
+            applied_limit=applied_limit,
             default_limit_used=bool(limit_plan["default_limit_used"]),
             requested_scan_limit=limit_plan.get("requested_scan_limit"),
             applied_scan_limit=applied_scan_limit
     requested_max_pages: int | None = None,
     applied_max_pages: int | None = None,
 ) -> dict[str, Any]:
+    applied_limit = int(limit_plan["applied_limit"])
     if count_only:
         effective_sample_complete = exact_count
     else:
         effective_sample_complete = (
             sample_complete
             if isinstance(sample_complete, bool)
+            else exact_count and matched_count <= applied_limit
         )
+    limit_hit = (
         False
         if count_only
+        else (applied_limit > 0 and matched_count > applied_limit)
     )
     truncated_by = _derive_truncated_by(
         self,
         hard_cap=bool(limit_plan.get("hard_cap_applied")),
         scan_limit_hit=scan_limit_hit,
         page_limit_hit=page_limit_hit,
+        limit_hit=limit_hit,
     )
     truncated = truncated_by != "none" or truncated_extra
     total_value = _as_int(base_meta.get("total"))

monty_api/runtime_filtering.py CHANGED Viewed

@@ -2,40 +2,48 @@ from __future__ import annotations
 from typing import Any
-from .aliases import (
-    ACTIVITY_FIELD_ALIASES,
-    ACTOR_FIELD_ALIASES,
-    COLLECTION_FIELD_ALIASES,
-    DAILY_PAPER_FIELD_ALIASES,
-    REPO_FIELD_ALIASES,
-    USER_FIELD_ALIASES,
-    USER_LIKES_FIELD_ALIASES,
 )
 from .http_runtime import _as_int
 def _project_items(
     self: Any,
     items: list[dict[str, Any]],
     fields: list[str] | None,
-    aliases: dict[str, str] | None = None,
 ) -> list[dict[str, Any]]:
     if not isinstance(fields, list) or not fields:
         return items
     wanted = [str(field).strip() for field in fields if str(field).strip()]
     if not wanted:
         return items
-    alias_map = {
-        str(key).strip().lower(): str(value).strip()
-        for key, value in (aliases or {}).items()
-        if str(key).strip() and str(value).strip()
-    }
     projected: list[dict[str, Any]] = []
     for row in items:
         out: dict[str, Any] = {}
         for key in wanted:
-            source_key = alias_map.get(key.lower(), key)
-            value = row.get(source_key)
             if value is None:
                 continue
             out[key] = value
@@ -46,63 +54,88 @@ def _project_items(
 def _project_repo_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
-    return _project_items(self, items, fields, aliases=REPO_FIELD_ALIASES)
 def _project_collection_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
-    return _project_items(self, items, fields, aliases=COLLECTION_FIELD_ALIASES)
 def _project_daily_paper_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
-    return _project_items(self, items, fields, aliases=DAILY_PAPER_FIELD_ALIASES)
 def _project_user_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
-    return _project_items(self, items, fields, aliases=USER_FIELD_ALIASES)
 def _project_actor_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
-    return _project_items(self, items, fields, aliases=ACTOR_FIELD_ALIASES)
 def _project_user_like_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
-    return _project_items(self, items, fields, aliases=USER_LIKES_FIELD_ALIASES)
 def _project_activity_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
-    return _project_items(self, items, fields, aliases=ACTIVITY_FIELD_ALIASES)
 def _normalize_where(
     self: Any,
     where: dict[str, Any] | None,
-    aliases: dict[str, str] | None = None,
 ) -> dict[str, Any] | None:
     if not isinstance(where, dict) or not where:
         return where
-    alias_map = {
-        str(key).strip().lower(): str(value).strip()
-        for key, value in (aliases or {}).items()
-        if str(key).strip() and str(value).strip()
-    }
     normalized: dict[str, Any] = {}
     for key, value in where.items():
         raw_key = str(key).strip()
         if not raw_key:
             continue
-        normalized[alias_map.get(raw_key.lower(), raw_key)] = value
     return normalized
@@ -161,9 +194,9 @@ def _apply_where(
     items: list[dict[str, Any]],
     where: dict[str, Any] | None,
     *,
-    aliases: dict[str, str] | None = None,
 ) -> list[dict[str, Any]]:
-    normalized_where = _normalize_where(self, where, aliases=aliases)
     if not isinstance(normalized_where, dict) or not normalized_where:
         return items
     return [row for row in items if _item_matches_where(self, row, normalized_where)]

 from typing import Any
+from .constants import (
+    ACTIVITY_CANONICAL_FIELDS,
+    ACTOR_CANONICAL_FIELDS,
+    COLLECTION_CANONICAL_FIELDS,
+    DAILY_PAPER_CANONICAL_FIELDS,
+    DISCUSSION_CANONICAL_FIELDS,
+    DISCUSSION_DETAIL_CANONICAL_FIELDS,
+    REPO_CANONICAL_FIELDS,
+    USER_CANONICAL_FIELDS,
+    USER_LIKES_CANONICAL_FIELDS,
 )
 from .http_runtime import _as_int
+def _allowed_field_set(allowed_fields: tuple[str, ...] | list[str] | set[str]) -> set[str]:
+    return {str(field).strip() for field in allowed_fields if str(field).strip()}
 def _project_items(
     self: Any,
     items: list[dict[str, Any]],
     fields: list[str] | None,
+    *,
+    allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
 ) -> list[dict[str, Any]]:
     if not isinstance(fields, list) or not fields:
         return items
     wanted = [str(field).strip() for field in fields if str(field).strip()]
     if not wanted:
         return items
+    if allowed_fields is not None:
+        allowed = _allowed_field_set(allowed_fields)
+        invalid = sorted(field for field in wanted if field not in allowed)
+        if invalid:
+            raise ValueError(
+                f"Unsupported fields {invalid}. Allowed fields: {sorted(allowed)}"
+            )
     projected: list[dict[str, Any]] = []
     for row in items:
         out: dict[str, Any] = {}
         for key in wanted:
+            value = row.get(key)
             if value is None:
                 continue
             out[key] = value
 def _project_repo_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
+    return _project_items(self, items, fields, allowed_fields=REPO_CANONICAL_FIELDS)
 def _project_collection_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
+    return _project_items(
+        self, items, fields, allowed_fields=COLLECTION_CANONICAL_FIELDS
+    )
 def _project_daily_paper_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
+    return _project_items(
+        self, items, fields, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
+    )
 def _project_user_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
+    return _project_items(self, items, fields, allowed_fields=USER_CANONICAL_FIELDS)
 def _project_actor_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
+    return _project_items(self, items, fields, allowed_fields=ACTOR_CANONICAL_FIELDS)
 def _project_user_like_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
+    return _project_items(
+        self, items, fields, allowed_fields=USER_LIKES_CANONICAL_FIELDS
+    )
 def _project_activity_items(
     self: Any, items: list[dict[str, Any]], fields: list[str] | None
 ) -> list[dict[str, Any]]:
+    return _project_items(
+        self, items, fields, allowed_fields=ACTIVITY_CANONICAL_FIELDS
+    )
+def _project_discussion_items(
+    self: Any, items: list[dict[str, Any]], fields: list[str] | None
+) -> list[dict[str, Any]]:
+    return _project_items(
+        self, items, fields, allowed_fields=DISCUSSION_CANONICAL_FIELDS
+    )
+def _project_discussion_detail_items(
+    self: Any, items: list[dict[str, Any]], fields: list[str] | None
+) -> list[dict[str, Any]]:
+    return _project_items(
+        self, items, fields, allowed_fields=DISCUSSION_DETAIL_CANONICAL_FIELDS
+    )
 def _normalize_where(
     self: Any,
     where: dict[str, Any] | None,
+    *,
+    allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
 ) -> dict[str, Any] | None:
     if not isinstance(where, dict) or not where:
         return where
+    allowed = _allowed_field_set(allowed_fields) if allowed_fields is not None else None
     normalized: dict[str, Any] = {}
     for key, value in where.items():
         raw_key = str(key).strip()
         if not raw_key:
             continue
+        if allowed is not None and raw_key not in allowed:
+            raise ValueError(
+                f"Unsupported filter fields {[raw_key]}. Allowed fields: {sorted(allowed)}"
+            )
+        normalized[raw_key] = value
     return normalized
     items: list[dict[str, Any]],
     where: dict[str, Any] | None,
     *,
+    allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
 ) -> list[dict[str, Any]]:
+    normalized_where = _normalize_where(self, where, allowed_fields=allowed_fields)
     if not isinstance(normalized_where, dict) or not normalized_where:
         return items
     return [row for row in items if _item_matches_where(self, row, normalized_where)]

monty_api/tool_entrypoints.py CHANGED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 import sys
 from pathlib import Path
 _PACKAGE_DIR = Path(__file__).resolve().parent
 _ROOT_DIR = _PACKAGE_DIR.parent
@@ -13,7 +14,40 @@ for candidate in (_ROOT_DIR, _PACKAGE_DIR):
     if candidate_str not in sys.path:
         sys.path.insert(0, candidate_str)
-from monty_api import HELPER_EXTERNALS, hf_hub_query, hf_hub_query_raw, main  # noqa: E402
 __all__ = [
     "HELPER_EXTERNALS",

 import sys
 from pathlib import Path
+from typing import Any
 _PACKAGE_DIR = Path(__file__).resolve().parent
 _ROOT_DIR = _PACKAGE_DIR.parent
     if candidate_str not in sys.path:
         sys.path.insert(0, candidate_str)
+from monty_api import (  # noqa: E402
+    HELPER_EXTERNALS,
+    hf_hub_query as _hf_hub_query,
+    hf_hub_query_raw as _hf_hub_query_raw,
+    main,
+)
+async def hf_hub_query(
+    query: str,
+    code: str,
+    max_calls: int | None = None,
+    timeout_sec: int | None = None,
+) -> dict[str, Any]:
+    return await _hf_hub_query(
+        query=query,
+        code=code,
+        max_calls=max_calls,
+        timeout_sec=timeout_sec,
+    )
+async def hf_hub_query_raw(
+    query: str,
+    code: str,
+    max_calls: int | None = None,
+    timeout_sec: int | None = None,
+) -> Any:
+    return await _hf_hub_query_raw(
+        query=query,
+        code=code,
+        max_calls=max_calls,
+        timeout_sec=timeout_sec,
+    )
 __all__ = [
     "HELPER_EXTERNALS",

monty_api/validation.py CHANGED Viewed

@@ -155,8 +155,8 @@ def _summarize_limit_hit(helper_name: str, result: Any) -> dict[str, Any] | None
         "truncated": meta.get("truncated"),
         "truncated_by": meta.get("truncated_by"),
         "more_available": meta.get("more_available"),
-        "requested_return_limit": meta.get("requested_return_limit"),
-        "applied_return_limit": meta.get("applied_return_limit"),
         "next_request_hint": meta.get("next_request_hint"),
     }
     if meta.get("scan_limit") is not None:

         "truncated": meta.get("truncated"),
         "truncated_by": meta.get("truncated_by"),
         "more_available": meta.get("more_available"),
+        "requested_limit": meta.get("requested_limit"),
+        "applied_limit": meta.get("applied_limit"),
         "next_request_hint": meta.get("next_request_hint"),
     }
     if meta.get("scan_limit") is not None:

tool_entrypoints.py CHANGED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 import sys
 from pathlib import Path
 _PACKAGE_DIR = Path(__file__).resolve().parent
 _ROOT_DIR = _PACKAGE_DIR.parent
@@ -13,7 +14,40 @@ for candidate in (_ROOT_DIR, _PACKAGE_DIR):
     if candidate_str not in sys.path:
         sys.path.insert(0, candidate_str)
-from monty_api import HELPER_EXTERNALS, hf_hub_query, hf_hub_query_raw, main  # noqa: E402
 __all__ = [
     "HELPER_EXTERNALS",

 import sys
 from pathlib import Path
+from typing import Any
 _PACKAGE_DIR = Path(__file__).resolve().parent
 _ROOT_DIR = _PACKAGE_DIR.parent
     if candidate_str not in sys.path:
         sys.path.insert(0, candidate_str)
+from monty_api import (  # noqa: E402
+    HELPER_EXTERNALS,
+    hf_hub_query as _hf_hub_query,
+    hf_hub_query_raw as _hf_hub_query_raw,
+    main,
+)
+async def hf_hub_query(
+    query: str,
+    code: str,
+    max_calls: int | None = None,
+    timeout_sec: int | None = None,
+) -> dict[str, Any]:
+    return await _hf_hub_query(
+        query=query,
+        code=code,
+        max_calls=max_calls,
+        timeout_sec=timeout_sec,
+    )
+async def hf_hub_query_raw(
+    query: str,
+    code: str,
+    max_calls: int | None = None,
+    timeout_sec: int | None = None,
+) -> Any:
+    return await _hf_hub_query_raw(
+        query=query,
+        code=code,
+        max_calls=max_calls,
+        timeout_sec=timeout_sec,
+    )
 __all__ = [
     "HELPER_EXTERNALS",