evalstate HF Staff commited on
Commit
cdf6171
·
verified ·
1 Parent(s): f687774

Deploy committed Monty runtime refactor

Browse files
hf-hub-query.md CHANGED
@@ -4,7 +4,7 @@ name: hf_hub_query
4
  model: hf.openai/gpt-oss-120b:sambanova
5
  use_history: false
6
  default: true
7
- description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound return_limit, scan_limit, and max_pages for brevity or broader coverage, and the tool can also be asked about its supported helpers, fields, aliases, defaults, and coverage behavior."
8
  shell: false
9
  skills: []
10
  function_tools:
@@ -32,7 +32,7 @@ The user must never see your generated Python unless they explicitly ask for deb
32
  - The return value of `solve(...)` is the user-facing payload.
33
  - Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
34
  - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
35
- - Do **not** rename `results` to `likes`, `liked_models`, `items`, `rows`, or similar in those composed outputs.
36
  - Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
37
  - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
38
  - Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.
 
4
  model: hf.openai/gpt-oss-120b:sambanova
5
  use_history: false
6
  default: true
7
+ description: "Read-only Hugging Face Hub navigator for discovery, lookup, filtering, ranking, counts, field-constrained extraction, and relationship questions across users, orgs, models, datasets, spaces, collections, discussions, daily papers, recent activity, followers/following, likes, and likers. Good for structured raw outputs and compact results. Generated helper calls can explicitly bound limit, scan_limit, and max_pages for brevity or broader coverage, and the tool can also be asked about its supported helpers, canonical fields, defaults, and coverage behavior."
8
  shell: false
9
  skills: []
10
  function_tools:
 
32
  - The return value of `solve(...)` is the user-facing payload.
33
  - Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
34
  - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
35
+ - Prefer returning outputs directly unless post-processing is required. Do **NOT** rename fields unless asked specifically.
36
  - Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
37
  - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
38
  - Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.
monty_api/aliases.py CHANGED
@@ -29,91 +29,3 @@ REPO_SORT_KEYS: dict[str, set[str]] = {
29
  "trending_score",
30
  },
31
  }
32
-
33
- # Alias policy:
34
- # - canonical names stay canonical
35
- # - support a small compatibility set for observed prompt/output variants
36
- # - do not add speculative synonyms unless they appear in prompts, evals, or
37
- # upstream payloads we already normalize
38
- SORT_KEY_ALIASES: dict[str, str] = {
39
- "createdat": "created_at",
40
- "created_at": "created_at",
41
- "created-at": "created_at",
42
- "downloads": "downloads",
43
- "likes": "likes",
44
- "lastmodified": "last_modified",
45
- "last_modified": "last_modified",
46
- "last-modified": "last_modified",
47
- "trendingscore": "trending_score",
48
- "trending_score": "trending_score",
49
- "trending-score": "trending_score",
50
- "trending": "trending_score",
51
- }
52
-
53
- USER_FIELD_ALIASES: dict[str, str] = {
54
- "login": "username",
55
- "user": "username",
56
- "handle": "username",
57
- "name": "fullname",
58
- "full_name": "fullname",
59
- "is_pro": "isPro",
60
- "pro": "isPro",
61
- }
62
-
63
- ACTOR_FIELD_ALIASES: dict[str, str] = {
64
- **USER_FIELD_ALIASES,
65
- "entity_type": "type",
66
- "user_type": "type",
67
- }
68
-
69
- REPO_FIELD_ALIASES: dict[str, str] = {
70
- "repoid": "repo_id",
71
- "repotype": "repo_type",
72
- "repourl": "repo_url",
73
- "createdat": "created_at",
74
- "lastmodified": "last_modified",
75
- "pipelinetag": "pipeline_tag",
76
- "numparams": "num_params",
77
- "trendingrank": "trending_rank",
78
- "trendingscore": "trending_score",
79
- "libraryname": "library_name",
80
- "paperswithcodeid": "paperswithcode_id",
81
- }
82
-
83
- COLLECTION_FIELD_ALIASES: dict[str, str] = {
84
- "collectionid": "collection_id",
85
- "lastupdated": "last_updated",
86
- "ownertype": "owner_type",
87
- "itemcount": "item_count",
88
- "author": "owner",
89
- }
90
-
91
- DAILY_PAPER_FIELD_ALIASES: dict[str, str] = {
92
- "paperid": "paper_id",
93
- "publishedat": "published_at",
94
- "submittedondailyat": "submitted_on_daily_at",
95
- "submittedby": "submitted_by",
96
- "discussionid": "discussion_id",
97
- "githubrepo": "github_repo_url",
98
- "githubstars": "github_stars",
99
- "projectpage": "project_page_url",
100
- "numcomments": "num_comments",
101
- "isauthorparticipating": "is_author_participating",
102
- "repoid": "repo_id",
103
- }
104
-
105
- USER_LIKES_FIELD_ALIASES: dict[str, str] = {
106
- "likedat": "liked_at",
107
- "repoid": "repo_id",
108
- "repotype": "repo_type",
109
- "repoauthor": "repo_author",
110
- "repolikes": "repo_likes",
111
- "repodownloads": "repo_downloads",
112
- }
113
-
114
- ACTIVITY_FIELD_ALIASES: dict[str, str] = {
115
- "time": "timestamp",
116
- "type": "event_type",
117
- "repoid": "repo_id",
118
- "repotype": "repo_type",
119
- }
 
29
  "trending_score",
30
  },
31
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
monty_api/helpers/activity.py CHANGED
@@ -4,8 +4,8 @@ from __future__ import annotations
4
  from functools import partial
5
  from typing import Any, Callable
6
 
7
- from ..aliases import ACTIVITY_FIELD_ALIASES
8
  from ..constants import (
 
9
  EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
10
  RECENT_ACTIVITY_PAGE_SIZE,
11
  RECENT_ACTIVITY_SCAN_MAX_PAGES,
@@ -19,7 +19,7 @@ async def hf_recent_activity(
19
  entity: str | None = None,
20
  activity_types: list[str] | None = None,
21
  repo_types: list[str] | None = None,
22
- return_limit: int | None = None,
23
  max_pages: int | None = None,
24
  start_cursor: str | None = None,
25
  count_only: bool = False,
@@ -27,7 +27,7 @@ async def hf_recent_activity(
27
  fields: list[str] | None = None,
28
  ) -> dict[str, Any]:
29
  start_calls = ctx.call_count["n"]
30
- default_return = ctx._policy_int("hf_recent_activity", "default_return", 100)
31
  page_cap = ctx._policy_int(
32
  "hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
33
  )
@@ -56,12 +56,12 @@ async def hf_recent_activity(
56
  error="entity is required",
57
  )
58
  limit_plan = ctx._resolve_exhaustive_limits(
59
- return_limit=return_limit,
60
  count_only=count_only,
61
- default_return=default_return,
62
- max_return=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
63
  )
64
- ret_lim = int(limit_plan["applied_return_limit"])
65
  page_lim = page_cap
66
  pages_lim = ctx._clamp_int(
67
  requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
@@ -85,8 +85,17 @@ async def hf_recent_activity(
85
  pages = 0
86
  exhausted_feed = False
87
  stopped_for_budget = False
88
- normalized_where = ctx._normalize_where(where, aliases=ACTIVITY_FIELD_ALIASES)
89
- while pages < pages_lim and (ret_lim == 0 or len(items) < ret_lim):
 
 
 
 
 
 
 
 
 
90
  if ctx._budget_remaining() <= 0:
91
  stopped_for_budget = True
92
  break
@@ -147,15 +156,22 @@ async def hf_recent_activity(
147
  if not ctx._item_matches_where(item, normalized_where):
148
  continue
149
  matched += 1
150
- if len(items) < ret_lim:
151
  items.append(item)
152
  if not next_cursor:
153
  exhausted_feed = True
154
  break
155
- items = ctx._project_activity_items(items, fields)
 
 
 
 
 
 
 
156
  exact_count = exhausted_feed and (not stopped_for_budget)
157
  sample_complete = (
158
- exact_count and ret_lim >= matched and (not count_only or matched == 0)
159
  )
160
  page_limit_hit = (
161
  next_cursor is not None and pages >= pages_lim and (not exhausted_feed)
 
4
  from functools import partial
5
  from typing import Any, Callable
6
 
 
7
  from ..constants import (
8
+ ACTIVITY_CANONICAL_FIELDS,
9
  EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
10
  RECENT_ACTIVITY_PAGE_SIZE,
11
  RECENT_ACTIVITY_SCAN_MAX_PAGES,
 
19
  entity: str | None = None,
20
  activity_types: list[str] | None = None,
21
  repo_types: list[str] | None = None,
22
+ limit: int | None = None,
23
  max_pages: int | None = None,
24
  start_cursor: str | None = None,
25
  count_only: bool = False,
 
27
  fields: list[str] | None = None,
28
  ) -> dict[str, Any]:
29
  start_calls = ctx.call_count["n"]
30
+ default_limit = ctx._policy_int("hf_recent_activity", "default_limit", 100)
31
  page_cap = ctx._policy_int(
32
  "hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
33
  )
 
56
  error="entity is required",
57
  )
58
  limit_plan = ctx._resolve_exhaustive_limits(
59
+ limit=limit,
60
  count_only=count_only,
61
+ default_limit=default_limit,
62
+ max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
63
  )
64
+ applied_limit = int(limit_plan["applied_limit"])
65
  page_lim = page_cap
66
  pages_lim = ctx._clamp_int(
67
  requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
 
85
  pages = 0
86
  exhausted_feed = False
87
  stopped_for_budget = False
88
+ try:
89
+ normalized_where = ctx._normalize_where(
90
+ where, allowed_fields=ACTIVITY_CANONICAL_FIELDS
91
+ )
92
+ except ValueError as exc:
93
+ return ctx._helper_error(
94
+ start_calls=start_calls,
95
+ source="/api/recent-activity",
96
+ error=exc,
97
+ )
98
+ while pages < pages_lim and (applied_limit == 0 or len(items) < applied_limit):
99
  if ctx._budget_remaining() <= 0:
100
  stopped_for_budget = True
101
  break
 
156
  if not ctx._item_matches_where(item, normalized_where):
157
  continue
158
  matched += 1
159
+ if len(items) < applied_limit:
160
  items.append(item)
161
  if not next_cursor:
162
  exhausted_feed = True
163
  break
164
+ try:
165
+ items = ctx._project_activity_items(items, fields)
166
+ except ValueError as exc:
167
+ return ctx._helper_error(
168
+ start_calls=start_calls,
169
+ source="/api/recent-activity",
170
+ error=exc,
171
+ )
172
  exact_count = exhausted_feed and (not stopped_for_budget)
173
  sample_complete = (
174
+ exact_count and applied_limit >= matched and (not count_only or matched == 0)
175
  )
176
  page_limit_hit = (
177
  next_cursor is not None and pages >= pages_lim and (not exhausted_feed)
monty_api/helpers/profiles.py CHANGED
@@ -5,11 +5,8 @@ from itertools import islice
5
  import re
6
  from typing import Any, Callable
7
  from ..context_types import HelperRuntimeContext
8
- from ..aliases import (
9
- ACTOR_FIELD_ALIASES,
10
- USER_FIELD_ALIASES,
11
- )
12
  from ..constants import (
 
13
  EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
14
  GRAPH_SCAN_LIMIT_CAP,
15
  OUTPUT_ITEMS_TRUNCATION_LIMIT,
@@ -74,7 +71,7 @@ async def hf_whoami(ctx: HelperRuntimeContext) -> dict[str, Any]:
74
  item = {
75
  "username": username,
76
  "fullname": payload.get("fullname"),
77
- "isPro": payload.get("isPro"),
78
  }
79
  items = [item] if isinstance(username, str) and username else []
80
  return ctx._helper_success(
@@ -148,16 +145,16 @@ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[st
148
  "username": obj.username or u,
149
  "fullname": obj.fullname,
150
  "bio": getattr(obj, "details", None),
151
- "avatarUrl": obj.avatar_url,
152
- "websiteUrl": getattr(obj, "websiteUrl", None),
153
  "twitter": _social_url("twitter", twitter_handle),
154
  "github": _social_url("github", github_handle),
155
  "linkedin": _social_url("linkedin", linkedin_handle),
156
  "bluesky": _social_url("bluesky", bluesky_handle),
157
- "twitterHandle": twitter_handle,
158
- "githubHandle": github_handle,
159
- "linkedinHandle": linkedin_handle,
160
- "blueskyHandle": bluesky_handle,
161
  "followers": ctx._as_int(obj.num_followers),
162
  "following": ctx._as_int(obj.num_following),
163
  "likes": ctx._as_int(obj.num_likes),
@@ -168,7 +165,7 @@ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[st
168
  "papers": ctx._as_int(getattr(obj, "num_papers", None)),
169
  "upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
170
  "orgs": org_names,
171
- "isPro": obj.is_pro,
172
  }
173
  return ctx._helper_success(
174
  start_calls=start_calls,
@@ -202,10 +199,10 @@ async def _hf_org_overview(
202
  return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
203
  item = {
204
  "organization": obj.name or org,
205
- "displayName": obj.fullname,
206
- "avatarUrl": obj.avatar_url,
207
  "description": obj.details,
208
- "websiteUrl": getattr(obj, "websiteUrl", None),
209
  "followers": ctx._as_int(obj.num_followers),
210
  "members": ctx._as_int(obj.num_users),
211
  "models": ctx._as_int(getattr(obj, "num_models", None)),
@@ -226,7 +223,7 @@ async def _hf_org_overview(
226
  async def hf_org_members(
227
  ctx: HelperRuntimeContext,
228
  organization: str,
229
- return_limit: int | None = None,
230
  scan_limit: int | None = None,
231
  count_only: bool = False,
232
  where: dict[str, Any] | None = None,
@@ -240,17 +237,17 @@ async def hf_org_members(
240
  source="/api/organizations/<o>/members",
241
  error="organization is required",
242
  )
243
- default_return = ctx._policy_int("hf_org_members", "default_return", 100)
244
  scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
245
  limit_plan = ctx._resolve_exhaustive_limits(
246
- return_limit=return_limit,
247
  count_only=count_only,
248
- default_return=default_return,
249
- max_return=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
250
  scan_limit=scan_limit,
251
  scan_cap=scan_cap,
252
  )
253
- ret_lim = int(limit_plan["applied_return_limit"])
254
  scan_lim = int(limit_plan["applied_scan_limit"])
255
  has_where = isinstance(where, dict) and bool(where)
256
  overview_total: int | None = None
@@ -299,11 +296,21 @@ async def hf_org_members(
299
  item = {
300
  "username": handle,
301
  "fullname": getattr(row, "fullname", None),
302
- "isPro": getattr(row, "is_pro", None),
303
  "role": getattr(row, "role", None),
304
  }
305
  normalized.append(item)
306
- normalized = ctx._apply_where(normalized, where, aliases=ACTOR_FIELD_ALIASES)
 
 
 
 
 
 
 
 
 
 
307
  observed_total = len(rows)
308
  scan_exhaustive = observed_total < scan_lim
309
  overview_list_mismatch = (
@@ -324,14 +331,14 @@ async def hf_org_members(
324
  total = observed_total
325
  total_matched = observed_total
326
  total_available = overview_total if overview_total is not None else observed_total
327
- items = normalized[:ret_lim]
328
  scan_limit_hit = not exact_count and observed_total >= scan_lim
329
  count_source = (
330
  "overview" if overview_total is not None and (not has_where) else "scan"
331
  )
332
  sample_complete = (
333
  exact_count
334
- and len(normalized) <= ret_lim
335
  and (not count_only or len(normalized) == 0)
336
  )
337
  more_available = ctx._derive_more_available(
@@ -342,7 +349,15 @@ async def hf_org_members(
342
  )
343
  if not exact_count and scan_limit_hit:
344
  more_available = "unknown" if has_where else True
345
- items = ctx._project_user_items(items, fields)
 
 
 
 
 
 
 
 
346
  meta = ctx._build_exhaustive_result_meta(
347
  base_meta={
348
  "scanned": observed_total,
@@ -375,7 +390,7 @@ async def _user_graph_helper(
375
  kind: str,
376
  username: str,
377
  pro_only: bool | None,
378
- return_limit: int | None,
379
  scan_limit: int | None,
380
  count_only: bool,
381
  where: dict[str, Any] | None,
@@ -384,10 +399,10 @@ async def _user_graph_helper(
384
  helper_name: str,
385
  ) -> dict[str, Any]:
386
  start_calls = ctx.call_count["n"]
387
- default_return = ctx._policy_int(helper_name, "default_return", 100)
388
  scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
389
- max_return = ctx._policy_int(
390
- helper_name, "max_return", EXHAUSTIVE_HELPER_RETURN_HARD_CAP
391
  )
392
  u = str(username or "").strip()
393
  if not u:
@@ -397,14 +412,14 @@ async def _user_graph_helper(
397
  error="username is required",
398
  )
399
  limit_plan = ctx._resolve_exhaustive_limits(
400
- return_limit=return_limit,
401
  count_only=count_only,
402
- default_return=default_return,
403
- max_return=max_return,
404
  scan_limit=scan_limit,
405
  scan_cap=scan_cap,
406
  )
407
- ret_lim = int(limit_plan["applied_return_limit"])
408
  scan_lim = int(limit_plan["applied_scan_limit"])
409
  has_where = isinstance(where, dict) and bool(where)
410
  filtered = pro_only is not None or has_where
@@ -509,14 +524,28 @@ async def _user_graph_helper(
509
  item = {
510
  "username": handle,
511
  "fullname": getattr(row, "fullname", None),
512
- "isPro": getattr(row, "is_pro", None),
513
  }
514
- if pro_only is True and item.get("isPro") is not True:
515
  continue
516
- if pro_only is False and item.get("isPro") is True:
517
  continue
518
  normalized.append(item)
519
- normalized = ctx._apply_where(normalized, where, aliases=USER_FIELD_ALIASES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  observed_total = len(rows)
521
  scan_exhaustive = observed_total < scan_lim
522
  overview_list_mismatch = (
@@ -537,14 +566,14 @@ async def _user_graph_helper(
537
  total = observed_total
538
  total_matched = observed_total
539
  total_available = overview_total if overview_total is not None else observed_total
540
- items = normalized[:ret_lim]
541
  scan_limit_hit = not exact_count and observed_total >= scan_lim
542
  count_source = (
543
  "overview" if overview_total is not None and (not filtered) else "scan"
544
  )
545
  sample_complete = (
546
  exact_count
547
- and len(normalized) <= ret_lim
548
  and (not count_only or len(normalized) == 0)
549
  )
550
  more_available = ctx._derive_more_available(
@@ -555,7 +584,19 @@ async def _user_graph_helper(
555
  )
556
  if not exact_count and scan_limit_hit:
557
  more_available = "unknown" if filtered else True
558
- items = ctx._project_user_items(items, fields)
 
 
 
 
 
 
 
 
 
 
 
 
559
  meta = ctx._build_exhaustive_result_meta(
560
  base_meta={
561
  "scanned": observed_total,
@@ -645,8 +686,8 @@ async def hf_profile_summary(
645
  "display_name": overview_item.get("fullname")
646
  or str(overview_item.get("username") or resolved_handle),
647
  "bio": overview_item.get("bio"),
648
- "avatar_url": overview_item.get("avatarUrl"),
649
- "website_url": overview_item.get("websiteUrl"),
650
  "twitter_url": overview_item.get("twitter"),
651
  "github_url": overview_item.get("github"),
652
  "linkedin_url": overview_item.get("linkedin"),
@@ -661,13 +702,13 @@ async def hf_profile_summary(
661
  "papers_count": ctx._overview_count(overview_item, "papers"),
662
  "upvotes_count": ctx._overview_count(overview_item, "upvotes"),
663
  "organizations": overview_item.get("orgs"),
664
- "is_pro": overview_item.get("isPro"),
665
  }
666
  if "likes" in requested_sections:
667
  likes = await ctx.call_helper(
668
  "hf_user_likes",
669
  username=resolved_handle,
670
- return_limit=likes_lim,
671
  scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
672
  count_only=likes_lim == 0,
673
  sort="liked_at",
@@ -689,7 +730,7 @@ async def hf_profile_summary(
689
  "hf_recent_activity",
690
  feed_type="user",
691
  entity=resolved_handle,
692
- return_limit=activity_lim,
693
  max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
694
  count_only=activity_lim == 0,
695
  fields=["timestamp", "event_type", "repo_type", "repo_id"],
@@ -724,11 +765,11 @@ async def hf_profile_summary(
724
  item = {
725
  "handle": str(overview_item.get("organization") or resolved_handle),
726
  "entity_type": "organization",
727
- "display_name": overview_item.get("displayName")
728
  or str(overview_item.get("organization") or resolved_handle),
729
  "description": overview_item.get("description"),
730
- "avatar_url": overview_item.get("avatarUrl"),
731
- "website_url": overview_item.get("websiteUrl"),
732
  "followers_count": ctx._overview_count(overview_item, "followers"),
733
  "members_count": ctx._overview_count(overview_item, "members"),
734
  "models_count": ctx._overview_count(overview_item, "models"),
@@ -765,7 +806,7 @@ async def hf_user_graph(
765
  ctx: HelperRuntimeContext,
766
  username: str | None = None,
767
  relation: str = "followers",
768
- return_limit: int | None = None,
769
  scan_limit: int | None = None,
770
  count_only: bool = False,
771
  pro_only: bool | None = None,
@@ -800,7 +841,7 @@ async def hf_user_graph(
800
  rel,
801
  resolved_username,
802
  pro_only,
803
- return_limit,
804
  scan_limit,
805
  count_only,
806
  where,
 
5
  import re
6
  from typing import Any, Callable
7
  from ..context_types import HelperRuntimeContext
 
 
 
 
8
  from ..constants import (
9
+ ACTOR_CANONICAL_FIELDS,
10
  EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
11
  GRAPH_SCAN_LIMIT_CAP,
12
  OUTPUT_ITEMS_TRUNCATION_LIMIT,
 
71
  item = {
72
  "username": username,
73
  "fullname": payload.get("fullname"),
74
+ "is_pro": payload.get("isPro"),
75
  }
76
  items = [item] if isinstance(username, str) and username else []
77
  return ctx._helper_success(
 
145
  "username": obj.username or u,
146
  "fullname": obj.fullname,
147
  "bio": getattr(obj, "details", None),
148
+ "avatar_url": obj.avatar_url,
149
+ "website_url": getattr(obj, "websiteUrl", None),
150
  "twitter": _social_url("twitter", twitter_handle),
151
  "github": _social_url("github", github_handle),
152
  "linkedin": _social_url("linkedin", linkedin_handle),
153
  "bluesky": _social_url("bluesky", bluesky_handle),
154
+ "twitter_handle": twitter_handle,
155
+ "github_handle": github_handle,
156
+ "linkedin_handle": linkedin_handle,
157
+ "bluesky_handle": bluesky_handle,
158
  "followers": ctx._as_int(obj.num_followers),
159
  "following": ctx._as_int(obj.num_following),
160
  "likes": ctx._as_int(obj.num_likes),
 
165
  "papers": ctx._as_int(getattr(obj, "num_papers", None)),
166
  "upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
167
  "orgs": org_names,
168
+ "is_pro": obj.is_pro,
169
  }
170
  return ctx._helper_success(
171
  start_calls=start_calls,
 
199
  return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
200
  item = {
201
  "organization": obj.name or org,
202
+ "display_name": obj.fullname,
203
+ "avatar_url": obj.avatar_url,
204
  "description": obj.details,
205
+ "website_url": getattr(obj, "websiteUrl", None),
206
  "followers": ctx._as_int(obj.num_followers),
207
  "members": ctx._as_int(obj.num_users),
208
  "models": ctx._as_int(getattr(obj, "num_models", None)),
 
223
  async def hf_org_members(
224
  ctx: HelperRuntimeContext,
225
  organization: str,
226
+ limit: int | None = None,
227
  scan_limit: int | None = None,
228
  count_only: bool = False,
229
  where: dict[str, Any] | None = None,
 
237
  source="/api/organizations/<o>/members",
238
  error="organization is required",
239
  )
240
+ default_limit = ctx._policy_int("hf_org_members", "default_limit", 100)
241
  scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
242
  limit_plan = ctx._resolve_exhaustive_limits(
243
+ limit=limit,
244
  count_only=count_only,
245
+ default_limit=default_limit,
246
+ max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
247
  scan_limit=scan_limit,
248
  scan_cap=scan_cap,
249
  )
250
+ applied_limit = int(limit_plan["applied_limit"])
251
  scan_lim = int(limit_plan["applied_scan_limit"])
252
  has_where = isinstance(where, dict) and bool(where)
253
  overview_total: int | None = None
 
296
  item = {
297
  "username": handle,
298
  "fullname": getattr(row, "fullname", None),
299
+ "is_pro": getattr(row, "is_pro", None),
300
  "role": getattr(row, "role", None),
301
  }
302
  normalized.append(item)
303
+ try:
304
+ normalized = ctx._apply_where(
305
+ normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
306
+ )
307
+ except ValueError as exc:
308
+ return ctx._helper_error(
309
+ start_calls=start_calls,
310
+ source=endpoint,
311
+ error=exc,
312
+ organization=org,
313
+ )
314
  observed_total = len(rows)
315
  scan_exhaustive = observed_total < scan_lim
316
  overview_list_mismatch = (
 
331
  total = observed_total
332
  total_matched = observed_total
333
  total_available = overview_total if overview_total is not None else observed_total
334
+ items = normalized[:applied_limit]
335
  scan_limit_hit = not exact_count and observed_total >= scan_lim
336
  count_source = (
337
  "overview" if overview_total is not None and (not has_where) else "scan"
338
  )
339
  sample_complete = (
340
  exact_count
341
+ and len(normalized) <= applied_limit
342
  and (not count_only or len(normalized) == 0)
343
  )
344
  more_available = ctx._derive_more_available(
 
349
  )
350
  if not exact_count and scan_limit_hit:
351
  more_available = "unknown" if has_where else True
352
+ try:
353
+ items = ctx._project_actor_items(items, fields)
354
+ except ValueError as exc:
355
+ return ctx._helper_error(
356
+ start_calls=start_calls,
357
+ source=endpoint,
358
+ error=exc,
359
+ organization=org,
360
+ )
361
  meta = ctx._build_exhaustive_result_meta(
362
  base_meta={
363
  "scanned": observed_total,
 
390
  kind: str,
391
  username: str,
392
  pro_only: bool | None,
393
+ limit: int | None,
394
  scan_limit: int | None,
395
  count_only: bool,
396
  where: dict[str, Any] | None,
 
399
  helper_name: str,
400
  ) -> dict[str, Any]:
401
  start_calls = ctx.call_count["n"]
402
+ default_limit = ctx._policy_int(helper_name, "default_limit", 100)
403
  scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
404
+ max_limit = ctx._policy_int(
405
+ helper_name, "max_limit", EXHAUSTIVE_HELPER_RETURN_HARD_CAP
406
  )
407
  u = str(username or "").strip()
408
  if not u:
 
412
  error="username is required",
413
  )
414
  limit_plan = ctx._resolve_exhaustive_limits(
415
+ limit=limit,
416
  count_only=count_only,
417
+ default_limit=default_limit,
418
+ max_limit=max_limit,
419
  scan_limit=scan_limit,
420
  scan_cap=scan_cap,
421
  )
422
+ applied_limit = int(limit_plan["applied_limit"])
423
  scan_lim = int(limit_plan["applied_scan_limit"])
424
  has_where = isinstance(where, dict) and bool(where)
425
  filtered = pro_only is not None or has_where
 
524
  item = {
525
  "username": handle,
526
  "fullname": getattr(row, "fullname", None),
527
+ "is_pro": getattr(row, "is_pro", None),
528
  }
529
+ if pro_only is True and item.get("is_pro") is not True:
530
  continue
531
+ if pro_only is False and item.get("is_pro") is True:
532
  continue
533
  normalized.append(item)
534
+ try:
535
+ normalized = ctx._apply_where(
536
+ normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
537
+ )
538
+ except ValueError as exc:
539
+ return ctx._helper_error(
540
+ start_calls=start_calls,
541
+ source=endpoint,
542
+ error=exc,
543
+ relation=kind,
544
+ username=u,
545
+ entity=u,
546
+ entity_type=entity_type,
547
+ organization=u if entity_type == "organization" else None,
548
+ )
549
  observed_total = len(rows)
550
  scan_exhaustive = observed_total < scan_lim
551
  overview_list_mismatch = (
 
566
  total = observed_total
567
  total_matched = observed_total
568
  total_available = overview_total if overview_total is not None else observed_total
569
+ items = normalized[:applied_limit]
570
  scan_limit_hit = not exact_count and observed_total >= scan_lim
571
  count_source = (
572
  "overview" if overview_total is not None and (not filtered) else "scan"
573
  )
574
  sample_complete = (
575
  exact_count
576
+ and len(normalized) <= applied_limit
577
  and (not count_only or len(normalized) == 0)
578
  )
579
  more_available = ctx._derive_more_available(
 
584
  )
585
  if not exact_count and scan_limit_hit:
586
  more_available = "unknown" if filtered else True
587
+ try:
588
+ items = ctx._project_actor_items(items, fields)
589
+ except ValueError as exc:
590
+ return ctx._helper_error(
591
+ start_calls=start_calls,
592
+ source=endpoint,
593
+ error=exc,
594
+ relation=kind,
595
+ username=u,
596
+ entity=u,
597
+ entity_type=entity_type,
598
+ organization=u if entity_type == "organization" else None,
599
+ )
600
  meta = ctx._build_exhaustive_result_meta(
601
  base_meta={
602
  "scanned": observed_total,
 
686
  "display_name": overview_item.get("fullname")
687
  or str(overview_item.get("username") or resolved_handle),
688
  "bio": overview_item.get("bio"),
689
+ "avatar_url": overview_item.get("avatar_url"),
690
+ "website_url": overview_item.get("website_url"),
691
  "twitter_url": overview_item.get("twitter"),
692
  "github_url": overview_item.get("github"),
693
  "linkedin_url": overview_item.get("linkedin"),
 
702
  "papers_count": ctx._overview_count(overview_item, "papers"),
703
  "upvotes_count": ctx._overview_count(overview_item, "upvotes"),
704
  "organizations": overview_item.get("orgs"),
705
+ "is_pro": overview_item.get("is_pro"),
706
  }
707
  if "likes" in requested_sections:
708
  likes = await ctx.call_helper(
709
  "hf_user_likes",
710
  username=resolved_handle,
711
+ limit=likes_lim,
712
  scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
713
  count_only=likes_lim == 0,
714
  sort="liked_at",
 
730
  "hf_recent_activity",
731
  feed_type="user",
732
  entity=resolved_handle,
733
+ limit=activity_lim,
734
  max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
735
  count_only=activity_lim == 0,
736
  fields=["timestamp", "event_type", "repo_type", "repo_id"],
 
765
  item = {
766
  "handle": str(overview_item.get("organization") or resolved_handle),
767
  "entity_type": "organization",
768
+ "display_name": overview_item.get("display_name")
769
  or str(overview_item.get("organization") or resolved_handle),
770
  "description": overview_item.get("description"),
771
+ "avatar_url": overview_item.get("avatar_url"),
772
+ "website_url": overview_item.get("website_url"),
773
  "followers_count": ctx._overview_count(overview_item, "followers"),
774
  "members_count": ctx._overview_count(overview_item, "members"),
775
  "models_count": ctx._overview_count(overview_item, "models"),
 
806
  ctx: HelperRuntimeContext,
807
  username: str | None = None,
808
  relation: str = "followers",
809
+ limit: int | None = None,
810
  scan_limit: int | None = None,
811
  count_only: bool = False,
812
  pro_only: bool | None = None,
 
841
  rel,
842
  resolved_username,
843
  pro_only,
844
+ limit,
845
  scan_limit,
846
  count_only,
847
  where,
monty_api/http_runtime.py CHANGED
@@ -9,11 +9,11 @@ from urllib.request import Request, urlopen
9
 
10
  from huggingface_hub import HfApi
11
 
12
- from .aliases import REPO_SORT_KEYS, SORT_KEY_ALIASES
13
  from .constants import (
14
  DEFAULT_TIMEOUT_SEC,
15
  )
16
- from .registry import REPO_API_ADAPTERS
17
  from .validation import _endpoint_allowed, _normalize_endpoint, _sanitize_params
18
 
19
 
@@ -78,10 +78,14 @@ def _normalize_repo_sort_key(
78
  if not raw:
79
  return None, None
80
 
81
- key = SORT_KEY_ALIASES.get(raw.lower().replace(" ", "").replace("__", "_"))
82
- if key is None:
83
- key = SORT_KEY_ALIASES.get(raw.lower())
84
- if key is None:
 
 
 
 
85
  return None, f"Invalid sort key '{raw}'"
86
 
87
  rt = _canonical_repo_type(repo_type)
@@ -111,6 +115,8 @@ def _repo_list_call(api: HfApi, repo_type: str, **kwargs: Any) -> list[Any]:
111
  def _repo_detail_call(api: HfApi, repo_type: str, repo_id: str) -> Any:
112
  adapter = _repo_api_adapter(repo_type)
113
  method = getattr(api, adapter.detail_method_name)
 
 
114
  return method(repo_id)
115
 
116
 
@@ -138,6 +144,43 @@ def _optional_str_list(value: Any) -> list[str] | None:
138
  return None
139
 
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int | None:
142
  direct = _as_int(num_params)
143
  if direct is not None:
@@ -149,6 +192,24 @@ def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int
149
  return _as_int(total)
150
 
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  def _extract_author_names(value: Any) -> list[str] | None:
153
  if not isinstance(value, (list, tuple)):
154
  return None
@@ -242,6 +303,8 @@ def _build_repo_row(
242
  models: Any = None,
243
  datasets: Any = None,
244
  subdomain: Any = None,
 
 
245
  ) -> dict[str, Any]:
246
  rt = _canonical_repo_type(repo_type)
247
  author_value = author
@@ -252,6 +315,15 @@ def _build_repo_row(
252
  ):
253
  author_value = repo_id.split("/", 1)[0]
254
 
 
 
 
 
 
 
 
 
 
255
  return {
256
  "id": repo_id,
257
  "slug": repo_id,
@@ -279,6 +351,8 @@ def _build_repo_row(
279
  "models": _optional_str_list(models),
280
  "datasets": _optional_str_list(datasets),
281
  "subdomain": subdomain,
 
 
282
  }
283
 
284
 
@@ -292,9 +366,7 @@ def _normalize_repo_search_row(row: Any, repo_type: str) -> dict[str, Any]:
292
  created_at=getattr(row, "created_at", None),
293
  last_modified=getattr(row, "last_modified", None),
294
  pipeline_tag=getattr(row, "pipeline_tag", None),
295
- num_params=_extract_num_params(
296
- getattr(row, "num_params", None), getattr(row, "safetensors", None)
297
- ),
298
  private=getattr(row, "private", None),
299
  trending_score=getattr(row, "trending_score", None),
300
  tags=getattr(row, "tags", None),
@@ -307,6 +379,7 @@ def _normalize_repo_search_row(row: Any, repo_type: str) -> dict[str, Any]:
307
  models=getattr(row, "models", None),
308
  datasets=getattr(row, "datasets", None),
309
  subdomain=getattr(row, "subdomain", None),
 
310
  )
311
 
312
 
@@ -325,11 +398,6 @@ def _normalize_repo_detail_row(
325
  def _normalize_trending_row(
326
  repo: dict[str, Any], default_repo_type: str, rank: int | None = None
327
  ) -> dict[str, Any]:
328
- raw_num_params = (
329
- repo.get("num_params")
330
- if repo.get("num_params") is not None
331
- else repo.get("numParameters")
332
- )
333
  row = _build_repo_row(
334
  repo_id=repo.get("id"),
335
  repo_type=repo.get("type") or repo.get("repoType") or default_repo_type,
@@ -339,7 +407,7 @@ def _normalize_trending_row(
339
  created_at=repo.get("createdAt"),
340
  last_modified=repo.get("lastModified"),
341
  pipeline_tag=repo.get("pipeline_tag"),
342
- num_params=_extract_num_params(raw_num_params, repo.get("safetensors")),
343
  private=repo.get("private"),
344
  trending_score=repo.get("trendingScore"),
345
  tags=repo.get("tags"),
@@ -352,6 +420,8 @@ def _normalize_trending_row(
352
  models=repo.get("models"),
353
  datasets=repo.get("datasets"),
354
  subdomain=repo.get("subdomain"),
 
 
355
  )
356
  if rank is not None:
357
  row["trending_rank"] = rank
@@ -419,7 +489,7 @@ def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | Non
419
  created_at=row.get("createdAt") or row.get("created_at"),
420
  last_modified=row.get("lastModified") or row.get("last_modified"),
421
  pipeline_tag=row.get("pipeline_tag") or row.get("pipelineTag"),
422
- num_params=_extract_num_params(row.get("num_params"), row.get("safetensors")),
423
  private=row.get("private"),
424
  tags=row.get("tags"),
425
  gated=row.get("gated"),
@@ -430,6 +500,8 @@ def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | Non
430
  models=row.get("models"),
431
  datasets=row.get("datasets"),
432
  subdomain=row.get("subdomain"),
 
 
433
  )
434
 
435
 
 
9
 
10
  from huggingface_hub import HfApi
11
 
12
+ from .aliases import REPO_SORT_KEYS
13
  from .constants import (
14
  DEFAULT_TIMEOUT_SEC,
15
  )
16
+ from .registry import REPO_API_ADAPTERS, REPO_SEARCH_DEFAULT_EXPAND
17
  from .validation import _endpoint_allowed, _normalize_endpoint, _sanitize_params
18
 
19
 
 
78
  if not raw:
79
  return None, None
80
 
81
+ key = raw
82
+ if key not in {
83
+ "created_at",
84
+ "downloads",
85
+ "last_modified",
86
+ "likes",
87
+ "trending_score",
88
+ }:
89
  return None, f"Invalid sort key '{raw}'"
90
 
91
  rt = _canonical_repo_type(repo_type)
 
115
  def _repo_detail_call(api: HfApi, repo_type: str, repo_id: str) -> Any:
116
  adapter = _repo_api_adapter(repo_type)
117
  method = getattr(api, adapter.detail_method_name)
118
+ if _canonical_repo_type(repo_type) == "space":
119
+ return method(repo_id, expand=list(REPO_SEARCH_DEFAULT_EXPAND["space"]))
120
  return method(repo_id)
121
 
122
 
 
144
  return None
145
 
146
 
147
+ def _space_runtime_to_dict(value: Any) -> dict[str, Any] | None:
148
+ if value is None:
149
+ return None
150
+
151
+ if isinstance(value, dict):
152
+ raw = value
153
+ hardware = raw.get("hardware")
154
+ current_hardware = (
155
+ hardware.get("current") if isinstance(hardware, dict) else hardware
156
+ )
157
+ requested_hardware = (
158
+ hardware.get("requested")
159
+ if isinstance(hardware, dict)
160
+ else raw.get("requested_hardware") or raw.get("requestedHardware")
161
+ )
162
+ sleep_time = _as_int(
163
+ raw.get("gcTimeout")
164
+ if raw.get("gcTimeout") is not None
165
+ else raw.get("sleep_time") or raw.get("sleepTime")
166
+ )
167
+ out = {
168
+ "stage": raw.get("stage"),
169
+ "hardware": current_hardware,
170
+ "requested_hardware": requested_hardware,
171
+ "sleep_time": sleep_time,
172
+ }
173
+ return {key: val for key, val in out.items() if val is not None} or None
174
+
175
+ out = {
176
+ "stage": getattr(value, "stage", None),
177
+ "hardware": getattr(value, "hardware", None),
178
+ "requested_hardware": getattr(value, "requested_hardware", None),
179
+ "sleep_time": _as_int(getattr(value, "sleep_time", None)),
180
+ }
181
+ return {key: val for key, val in out.items() if val is not None} or None
182
+
183
+
184
  def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int | None:
185
  direct = _as_int(num_params)
186
  if direct is not None:
 
192
  return _as_int(total)
193
 
194
 
195
+ def _extract_num_params_from_object(row: Any) -> int | None:
196
+ raw_num_params = getattr(row, "num_params", None)
197
+ if raw_num_params is None:
198
+ raw_num_params = getattr(row, "numParameters", None)
199
+ if raw_num_params is None:
200
+ raw_num_params = getattr(row, "num_parameters", None)
201
+ return _extract_num_params(raw_num_params, getattr(row, "safetensors", None))
202
+
203
+
204
+ def _extract_num_params_from_dict(row: dict[str, Any]) -> int | None:
205
+ raw_num_params = row.get("num_params")
206
+ if raw_num_params is None:
207
+ raw_num_params = row.get("numParameters")
208
+ if raw_num_params is None:
209
+ raw_num_params = row.get("num_parameters")
210
+ return _extract_num_params(raw_num_params, row.get("safetensors"))
211
+
212
+
213
  def _extract_author_names(value: Any) -> list[str] | None:
214
  if not isinstance(value, (list, tuple)):
215
  return None
 
303
  models: Any = None,
304
  datasets: Any = None,
305
  subdomain: Any = None,
306
+ runtime: Any = None,
307
+ runtime_stage: Any = None,
308
  ) -> dict[str, Any]:
309
  rt = _canonical_repo_type(repo_type)
310
  author_value = author
 
315
  ):
316
  author_value = repo_id.split("/", 1)[0]
317
 
318
+ runtime_payload = _space_runtime_to_dict(runtime)
319
+ resolved_runtime_stage = (
320
+ runtime_stage
321
+ if runtime_stage is not None
322
+ else runtime_payload.get("stage")
323
+ if isinstance(runtime_payload, dict)
324
+ else None
325
+ )
326
+
327
  return {
328
  "id": repo_id,
329
  "slug": repo_id,
 
351
  "models": _optional_str_list(models),
352
  "datasets": _optional_str_list(datasets),
353
  "subdomain": subdomain,
354
+ "runtime_stage": resolved_runtime_stage,
355
+ "runtime": runtime_payload,
356
  }
357
 
358
 
 
366
  created_at=getattr(row, "created_at", None),
367
  last_modified=getattr(row, "last_modified", None),
368
  pipeline_tag=getattr(row, "pipeline_tag", None),
369
+ num_params=_extract_num_params_from_object(row),
 
 
370
  private=getattr(row, "private", None),
371
  trending_score=getattr(row, "trending_score", None),
372
  tags=getattr(row, "tags", None),
 
379
  models=getattr(row, "models", None),
380
  datasets=getattr(row, "datasets", None),
381
  subdomain=getattr(row, "subdomain", None),
382
+ runtime=getattr(row, "runtime", None),
383
  )
384
 
385
 
 
398
  def _normalize_trending_row(
399
  repo: dict[str, Any], default_repo_type: str, rank: int | None = None
400
  ) -> dict[str, Any]:
 
 
 
 
 
401
  row = _build_repo_row(
402
  repo_id=repo.get("id"),
403
  repo_type=repo.get("type") or repo.get("repoType") or default_repo_type,
 
407
  created_at=repo.get("createdAt"),
408
  last_modified=repo.get("lastModified"),
409
  pipeline_tag=repo.get("pipeline_tag"),
410
+ num_params=_extract_num_params_from_dict(repo),
411
  private=repo.get("private"),
412
  trending_score=repo.get("trendingScore"),
413
  tags=repo.get("tags"),
 
420
  models=repo.get("models"),
421
  datasets=repo.get("datasets"),
422
  subdomain=repo.get("subdomain"),
423
+ runtime=repo.get("runtime"),
424
+ runtime_stage=repo.get("runtime_stage") or repo.get("runtimeStage"),
425
  )
426
  if rank is not None:
427
  row["trending_rank"] = rank
 
489
  created_at=row.get("createdAt") or row.get("created_at"),
490
  last_modified=row.get("lastModified") or row.get("last_modified"),
491
  pipeline_tag=row.get("pipeline_tag") or row.get("pipelineTag"),
492
+ num_params=_extract_num_params_from_dict(row),
493
  private=row.get("private"),
494
  tags=row.get("tags"),
495
  gated=row.get("gated"),
 
500
  models=row.get("models"),
501
  datasets=row.get("datasets"),
502
  subdomain=row.get("subdomain"),
503
+ runtime=row.get("runtime"),
504
+ runtime_stage=row.get("runtime_stage") or row.get("runtimeStage"),
505
  )
506
 
507
 
monty_api/query_entrypoints.py CHANGED
@@ -4,6 +4,8 @@ import argparse
4
  import asyncio
5
  import inspect
6
  import json
 
 
7
  import time
8
  from typing import Any, Callable
9
 
@@ -33,6 +35,25 @@ class MontyExecutionError(RuntimeError):
33
  self.trace = trace
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def _introspect_helper_signatures() -> dict[str, set[str]]:
37
  env = build_runtime_helper_environment(
38
  max_calls=DEFAULT_MAX_CALLS,
@@ -213,6 +234,12 @@ async def _execute_query(
213
  timeout_sec=timeout_sec,
214
  )
215
  )
 
 
 
 
 
 
216
  return await _run_with_monty(
217
  code=prepared_code,
218
  query=prepared_query,
 
4
  import asyncio
5
  import inspect
6
  import json
7
+ import os
8
+ import sys
9
  import time
10
  from typing import Any, Callable
11
 
 
35
  self.trace = trace
36
 
37
 
38
+ def _query_debug_enabled() -> bool:
39
+ value = os.environ.get("MONTY_DEBUG_QUERY", "")
40
+ return value.strip().lower() in {"1", "true", "yes", "on"}
41
+
42
+
43
+ def _log_generated_query(
44
+ *, query: str, code: str, max_calls: int | None, timeout_sec: int | None
45
+ ) -> None:
46
+ if not _query_debug_enabled():
47
+ return
48
+ print("[monty-debug] query:", file=sys.stderr)
49
+ print(query, file=sys.stderr)
50
+ print("[monty-debug] max_calls:", max_calls, file=sys.stderr)
51
+ print("[monty-debug] timeout_sec:", timeout_sec, file=sys.stderr)
52
+ print("[monty-debug] code:", file=sys.stderr)
53
+ print(code, file=sys.stderr)
54
+ sys.stderr.flush()
55
+
56
+
57
  def _introspect_helper_signatures() -> dict[str, set[str]]:
58
  env = build_runtime_helper_environment(
59
  max_calls=DEFAULT_MAX_CALLS,
 
234
  timeout_sec=timeout_sec,
235
  )
236
  )
237
+ _log_generated_query(
238
+ query=prepared_query,
239
+ code=prepared_code,
240
+ max_calls=prepared_max_calls,
241
+ timeout_sec=prepared_timeout,
242
+ )
243
  return await _run_with_monty(
244
  code=prepared_code,
245
  query=prepared_query,
monty_api/runtime_context.py CHANGED
@@ -60,6 +60,8 @@ from .runtime_filtering import (
60
  _project_activity_items,
61
  _project_actor_items,
62
  _project_collection_items,
 
 
63
  _project_daily_paper_items,
64
  _project_items,
65
  _project_repo_items,
@@ -215,6 +217,8 @@ for name, value in {
215
  "_project_items": _project_items,
216
  "_project_repo_items": _project_repo_items,
217
  "_project_collection_items": _project_collection_items,
 
 
218
  "_project_daily_paper_items": _project_daily_paper_items,
219
  "_project_user_items": _project_user_items,
220
  "_project_actor_items": _project_actor_items,
 
60
  _project_activity_items,
61
  _project_actor_items,
62
  _project_collection_items,
63
+ _project_discussion_detail_items,
64
+ _project_discussion_items,
65
  _project_daily_paper_items,
66
  _project_items,
67
  _project_repo_items,
 
217
  "_project_items": _project_items,
218
  "_project_repo_items": _project_repo_items,
219
  "_project_collection_items": _project_collection_items,
220
+ "_project_discussion_items": _project_discussion_items,
221
+ "_project_discussion_detail_items": _project_discussion_detail_items,
222
  "_project_daily_paper_items": _project_daily_paper_items,
223
  "_project_user_items": _project_user_items,
224
  "_project_actor_items": _project_actor_items,
monty_api/runtime_envelopes.py CHANGED
@@ -21,8 +21,8 @@ def _helper_meta(
21
  def _derive_limit_metadata(
22
  self: Any,
23
  *,
24
- requested_return_limit: int | None,
25
- applied_return_limit: int,
26
  default_limit_used: bool,
27
  requested_scan_limit: int | None = None,
28
  applied_scan_limit: int | None = None,
@@ -30,8 +30,8 @@ def _derive_limit_metadata(
30
  applied_max_pages: int | None = None,
31
  ) -> dict[str, Any]:
32
  meta: dict[str, Any] = {
33
- "requested_return_limit": requested_return_limit,
34
- "applied_return_limit": applied_return_limit,
35
  "default_limit_used": default_limit_used,
36
  }
37
  if requested_scan_limit is not None or applied_scan_limit is not None:
@@ -42,8 +42,8 @@ def _derive_limit_metadata(
42
  meta["requested_max_pages"] = requested_max_pages
43
  meta["applied_max_pages"] = applied_max_pages
44
  meta["page_limit_applied"] = requested_max_pages != applied_max_pages
45
- if requested_return_limit is not None:
46
- meta["hard_cap_applied"] = applied_return_limit < requested_return_limit
47
  return meta
48
 
49
 
@@ -68,9 +68,9 @@ def _derive_truncated_by(
68
  hard_cap: bool = False,
69
  scan_limit_hit: bool = False,
70
  page_limit_hit: bool = False,
71
- return_limit_hit: bool = False,
72
  ) -> str:
73
- causes = [hard_cap, scan_limit_hit, page_limit_hit, return_limit_hit]
74
  if sum(1 for cause in causes if cause) > 1:
75
  return "multiple"
76
  if hard_cap:
@@ -79,8 +79,8 @@ def _derive_truncated_by(
79
  return "scan_limit"
80
  if page_limit_hit:
81
  return "page_limit"
82
- if return_limit_hit:
83
- return "return_limit"
84
  return "none"
85
 
86
 
@@ -89,7 +89,7 @@ def _derive_can_request_more(
89
  ) -> bool:
90
  if sample_complete:
91
  return False
92
- return truncated_by in {"return_limit", "scan_limit", "page_limit", "multiple"}
93
 
94
 
95
  def _derive_next_request_hint(
@@ -97,12 +97,12 @@ def _derive_next_request_hint(
97
  *,
98
  truncated_by: str,
99
  more_available: bool | str,
100
- applied_return_limit: int,
101
  applied_scan_limit: int | None = None,
102
  applied_max_pages: int | None = None,
103
  ) -> str:
104
- if truncated_by == "return_limit":
105
- return f"Ask for return_limit>{applied_return_limit} to see more rows"
106
  if truncated_by == "scan_limit" and applied_scan_limit is not None:
107
  return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
108
  if truncated_by == "page_limit" and applied_max_pages is not None:
@@ -121,28 +121,27 @@ def _derive_next_request_hint(
121
  def _resolve_exhaustive_limits(
122
  self: Any,
123
  *,
124
- return_limit: int | None,
125
  count_only: bool,
126
- default_return: int,
127
- max_return: int,
128
  scan_limit: int | None = None,
129
  scan_cap: int | None = None,
130
  ) -> dict[str, Any]:
131
- requested_return_limit = None if count_only else return_limit
132
- effective_requested_return_limit = 0 if count_only else requested_return_limit
133
  out: dict[str, Any] = {
134
- "requested_return_limit": requested_return_limit,
135
- "applied_return_limit": _clamp_int(
136
- effective_requested_return_limit,
137
- default=default_return,
138
  minimum=0,
139
- maximum=max_return,
140
  ),
141
- "default_limit_used": requested_return_limit is None and not count_only,
142
  }
143
  out["hard_cap_applied"] = (
144
- requested_return_limit is not None
145
- and out["applied_return_limit"] < requested_return_limit
146
  )
147
  if scan_cap is not None:
148
  out["requested_scan_limit"] = scan_limit
@@ -168,7 +167,7 @@ def _build_exhaustive_meta(
168
  applied_max_pages: int | None = None,
169
  ) -> dict[str, Any]:
170
  meta = dict(base_meta)
171
- applied_return_limit = int(limit_plan["applied_return_limit"])
172
  applied_scan_limit = limit_plan.get("applied_scan_limit")
173
  meta.update(
174
  {
@@ -186,7 +185,7 @@ def _build_exhaustive_meta(
186
  self,
187
  truncated_by=truncated_by,
188
  more_available=more_available,
189
- applied_return_limit=applied_return_limit,
190
  applied_scan_limit=applied_scan_limit
191
  if isinstance(applied_scan_limit, int)
192
  else None,
@@ -197,8 +196,8 @@ def _build_exhaustive_meta(
197
  meta.update(
198
  _derive_limit_metadata(
199
  self,
200
- requested_return_limit=limit_plan["requested_return_limit"],
201
- applied_return_limit=applied_return_limit,
202
  default_limit_used=bool(limit_plan["default_limit_used"]),
203
  requested_scan_limit=limit_plan.get("requested_scan_limit"),
204
  applied_scan_limit=applied_scan_limit
@@ -263,26 +262,26 @@ def _build_exhaustive_result_meta(
263
  requested_max_pages: int | None = None,
264
  applied_max_pages: int | None = None,
265
  ) -> dict[str, Any]:
266
- applied_return_limit = int(limit_plan["applied_return_limit"])
267
  if count_only:
268
  effective_sample_complete = exact_count
269
  else:
270
  effective_sample_complete = (
271
  sample_complete
272
  if isinstance(sample_complete, bool)
273
- else exact_count and matched_count <= applied_return_limit
274
  )
275
- return_limit_hit = (
276
  False
277
  if count_only
278
- else (applied_return_limit > 0 and matched_count > applied_return_limit)
279
  )
280
  truncated_by = _derive_truncated_by(
281
  self,
282
  hard_cap=bool(limit_plan.get("hard_cap_applied")),
283
  scan_limit_hit=scan_limit_hit,
284
  page_limit_hit=page_limit_hit,
285
- return_limit_hit=return_limit_hit,
286
  )
287
  truncated = truncated_by != "none" or truncated_extra
288
  total_value = _as_int(base_meta.get("total"))
 
21
  def _derive_limit_metadata(
22
  self: Any,
23
  *,
24
+ requested_limit: int | None,
25
+ applied_limit: int,
26
  default_limit_used: bool,
27
  requested_scan_limit: int | None = None,
28
  applied_scan_limit: int | None = None,
 
30
  applied_max_pages: int | None = None,
31
  ) -> dict[str, Any]:
32
  meta: dict[str, Any] = {
33
+ "requested_limit": requested_limit,
34
+ "applied_limit": applied_limit,
35
  "default_limit_used": default_limit_used,
36
  }
37
  if requested_scan_limit is not None or applied_scan_limit is not None:
 
42
  meta["requested_max_pages"] = requested_max_pages
43
  meta["applied_max_pages"] = applied_max_pages
44
  meta["page_limit_applied"] = requested_max_pages != applied_max_pages
45
+ if requested_limit is not None:
46
+ meta["hard_cap_applied"] = applied_limit < requested_limit
47
  return meta
48
 
49
 
 
68
  hard_cap: bool = False,
69
  scan_limit_hit: bool = False,
70
  page_limit_hit: bool = False,
71
+ limit_hit: bool = False,
72
  ) -> str:
73
+ causes = [hard_cap, scan_limit_hit, page_limit_hit, limit_hit]
74
  if sum(1 for cause in causes if cause) > 1:
75
  return "multiple"
76
  if hard_cap:
 
79
  return "scan_limit"
80
  if page_limit_hit:
81
  return "page_limit"
82
+ if limit_hit:
83
+ return "limit"
84
  return "none"
85
 
86
 
 
89
  ) -> bool:
90
  if sample_complete:
91
  return False
92
+ return truncated_by in {"limit", "scan_limit", "page_limit", "multiple"}
93
 
94
 
95
  def _derive_next_request_hint(
 
97
  *,
98
  truncated_by: str,
99
  more_available: bool | str,
100
+ applied_limit: int,
101
  applied_scan_limit: int | None = None,
102
  applied_max_pages: int | None = None,
103
  ) -> str:
104
+ if truncated_by == "limit":
105
+ return f"Ask for limit>{applied_limit} to see more rows"
106
  if truncated_by == "scan_limit" and applied_scan_limit is not None:
107
  return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
108
  if truncated_by == "page_limit" and applied_max_pages is not None:
 
121
  def _resolve_exhaustive_limits(
122
  self: Any,
123
  *,
124
+ limit: int | None,
125
  count_only: bool,
126
+ default_limit: int,
127
+ max_limit: int,
128
  scan_limit: int | None = None,
129
  scan_cap: int | None = None,
130
  ) -> dict[str, Any]:
131
+ requested_limit = None if count_only else limit
132
+ effective_requested_limit = 0 if count_only else requested_limit
133
  out: dict[str, Any] = {
134
+ "requested_limit": requested_limit,
135
+ "applied_limit": _clamp_int(
136
+ effective_requested_limit,
137
+ default=default_limit,
138
  minimum=0,
139
+ maximum=max_limit,
140
  ),
141
+ "default_limit_used": requested_limit is None and not count_only,
142
  }
143
  out["hard_cap_applied"] = (
144
+ requested_limit is not None and out["applied_limit"] < requested_limit
 
145
  )
146
  if scan_cap is not None:
147
  out["requested_scan_limit"] = scan_limit
 
167
  applied_max_pages: int | None = None,
168
  ) -> dict[str, Any]:
169
  meta = dict(base_meta)
170
+ applied_limit = int(limit_plan["applied_limit"])
171
  applied_scan_limit = limit_plan.get("applied_scan_limit")
172
  meta.update(
173
  {
 
185
  self,
186
  truncated_by=truncated_by,
187
  more_available=more_available,
188
+ applied_limit=applied_limit,
189
  applied_scan_limit=applied_scan_limit
190
  if isinstance(applied_scan_limit, int)
191
  else None,
 
196
  meta.update(
197
  _derive_limit_metadata(
198
  self,
199
+ requested_limit=limit_plan["requested_limit"],
200
+ applied_limit=applied_limit,
201
  default_limit_used=bool(limit_plan["default_limit_used"]),
202
  requested_scan_limit=limit_plan.get("requested_scan_limit"),
203
  applied_scan_limit=applied_scan_limit
 
262
  requested_max_pages: int | None = None,
263
  applied_max_pages: int | None = None,
264
  ) -> dict[str, Any]:
265
+ applied_limit = int(limit_plan["applied_limit"])
266
  if count_only:
267
  effective_sample_complete = exact_count
268
  else:
269
  effective_sample_complete = (
270
  sample_complete
271
  if isinstance(sample_complete, bool)
272
+ else exact_count and matched_count <= applied_limit
273
  )
274
+ limit_hit = (
275
  False
276
  if count_only
277
+ else (applied_limit > 0 and matched_count > applied_limit)
278
  )
279
  truncated_by = _derive_truncated_by(
280
  self,
281
  hard_cap=bool(limit_plan.get("hard_cap_applied")),
282
  scan_limit_hit=scan_limit_hit,
283
  page_limit_hit=page_limit_hit,
284
+ limit_hit=limit_hit,
285
  )
286
  truncated = truncated_by != "none" or truncated_extra
287
  total_value = _as_int(base_meta.get("total"))
monty_api/runtime_filtering.py CHANGED
@@ -2,40 +2,48 @@ from __future__ import annotations
2
 
3
  from typing import Any
4
 
5
- from .aliases import (
6
- ACTIVITY_FIELD_ALIASES,
7
- ACTOR_FIELD_ALIASES,
8
- COLLECTION_FIELD_ALIASES,
9
- DAILY_PAPER_FIELD_ALIASES,
10
- REPO_FIELD_ALIASES,
11
- USER_FIELD_ALIASES,
12
- USER_LIKES_FIELD_ALIASES,
 
 
13
  )
14
  from .http_runtime import _as_int
15
 
16
 
 
 
 
 
17
  def _project_items(
18
  self: Any,
19
  items: list[dict[str, Any]],
20
  fields: list[str] | None,
21
- aliases: dict[str, str] | None = None,
 
22
  ) -> list[dict[str, Any]]:
23
  if not isinstance(fields, list) or not fields:
24
  return items
25
  wanted = [str(field).strip() for field in fields if str(field).strip()]
26
  if not wanted:
27
  return items
28
- alias_map = {
29
- str(key).strip().lower(): str(value).strip()
30
- for key, value in (aliases or {}).items()
31
- if str(key).strip() and str(value).strip()
32
- }
 
 
33
  projected: list[dict[str, Any]] = []
34
  for row in items:
35
  out: dict[str, Any] = {}
36
  for key in wanted:
37
- source_key = alias_map.get(key.lower(), key)
38
- value = row.get(source_key)
39
  if value is None:
40
  continue
41
  out[key] = value
@@ -46,63 +54,88 @@ def _project_items(
46
  def _project_repo_items(
47
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
48
  ) -> list[dict[str, Any]]:
49
- return _project_items(self, items, fields, aliases=REPO_FIELD_ALIASES)
50
 
51
 
52
  def _project_collection_items(
53
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
54
  ) -> list[dict[str, Any]]:
55
- return _project_items(self, items, fields, aliases=COLLECTION_FIELD_ALIASES)
 
 
56
 
57
 
58
  def _project_daily_paper_items(
59
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
60
  ) -> list[dict[str, Any]]:
61
- return _project_items(self, items, fields, aliases=DAILY_PAPER_FIELD_ALIASES)
 
 
62
 
63
 
64
  def _project_user_items(
65
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
66
  ) -> list[dict[str, Any]]:
67
- return _project_items(self, items, fields, aliases=USER_FIELD_ALIASES)
68
 
69
 
70
  def _project_actor_items(
71
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
72
  ) -> list[dict[str, Any]]:
73
- return _project_items(self, items, fields, aliases=ACTOR_FIELD_ALIASES)
74
 
75
 
76
  def _project_user_like_items(
77
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
78
  ) -> list[dict[str, Any]]:
79
- return _project_items(self, items, fields, aliases=USER_LIKES_FIELD_ALIASES)
 
 
80
 
81
 
82
  def _project_activity_items(
83
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
84
  ) -> list[dict[str, Any]]:
85
- return _project_items(self, items, fields, aliases=ACTIVITY_FIELD_ALIASES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  def _normalize_where(
89
  self: Any,
90
  where: dict[str, Any] | None,
91
- aliases: dict[str, str] | None = None,
 
92
  ) -> dict[str, Any] | None:
93
  if not isinstance(where, dict) or not where:
94
  return where
95
- alias_map = {
96
- str(key).strip().lower(): str(value).strip()
97
- for key, value in (aliases or {}).items()
98
- if str(key).strip() and str(value).strip()
99
- }
100
  normalized: dict[str, Any] = {}
101
  for key, value in where.items():
102
  raw_key = str(key).strip()
103
  if not raw_key:
104
  continue
105
- normalized[alias_map.get(raw_key.lower(), raw_key)] = value
 
 
 
 
106
  return normalized
107
 
108
 
@@ -161,9 +194,9 @@ def _apply_where(
161
  items: list[dict[str, Any]],
162
  where: dict[str, Any] | None,
163
  *,
164
- aliases: dict[str, str] | None = None,
165
  ) -> list[dict[str, Any]]:
166
- normalized_where = _normalize_where(self, where, aliases=aliases)
167
  if not isinstance(normalized_where, dict) or not normalized_where:
168
  return items
169
  return [row for row in items if _item_matches_where(self, row, normalized_where)]
 
2
 
3
  from typing import Any
4
 
5
+ from .constants import (
6
+ ACTIVITY_CANONICAL_FIELDS,
7
+ ACTOR_CANONICAL_FIELDS,
8
+ COLLECTION_CANONICAL_FIELDS,
9
+ DAILY_PAPER_CANONICAL_FIELDS,
10
+ DISCUSSION_CANONICAL_FIELDS,
11
+ DISCUSSION_DETAIL_CANONICAL_FIELDS,
12
+ REPO_CANONICAL_FIELDS,
13
+ USER_CANONICAL_FIELDS,
14
+ USER_LIKES_CANONICAL_FIELDS,
15
  )
16
  from .http_runtime import _as_int
17
 
18
 
19
+ def _allowed_field_set(allowed_fields: tuple[str, ...] | list[str] | set[str]) -> set[str]:
20
+ return {str(field).strip() for field in allowed_fields if str(field).strip()}
21
+
22
+
23
  def _project_items(
24
  self: Any,
25
  items: list[dict[str, Any]],
26
  fields: list[str] | None,
27
+ *,
28
+ allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
29
  ) -> list[dict[str, Any]]:
30
  if not isinstance(fields, list) or not fields:
31
  return items
32
  wanted = [str(field).strip() for field in fields if str(field).strip()]
33
  if not wanted:
34
  return items
35
+ if allowed_fields is not None:
36
+ allowed = _allowed_field_set(allowed_fields)
37
+ invalid = sorted(field for field in wanted if field not in allowed)
38
+ if invalid:
39
+ raise ValueError(
40
+ f"Unsupported fields {invalid}. Allowed fields: {sorted(allowed)}"
41
+ )
42
  projected: list[dict[str, Any]] = []
43
  for row in items:
44
  out: dict[str, Any] = {}
45
  for key in wanted:
46
+ value = row.get(key)
 
47
  if value is None:
48
  continue
49
  out[key] = value
 
54
  def _project_repo_items(
55
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
56
  ) -> list[dict[str, Any]]:
57
+ return _project_items(self, items, fields, allowed_fields=REPO_CANONICAL_FIELDS)
58
 
59
 
60
  def _project_collection_items(
61
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
62
  ) -> list[dict[str, Any]]:
63
+ return _project_items(
64
+ self, items, fields, allowed_fields=COLLECTION_CANONICAL_FIELDS
65
+ )
66
 
67
 
68
  def _project_daily_paper_items(
69
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
70
  ) -> list[dict[str, Any]]:
71
+ return _project_items(
72
+ self, items, fields, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
73
+ )
74
 
75
 
76
  def _project_user_items(
77
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
78
  ) -> list[dict[str, Any]]:
79
+ return _project_items(self, items, fields, allowed_fields=USER_CANONICAL_FIELDS)
80
 
81
 
82
  def _project_actor_items(
83
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
84
  ) -> list[dict[str, Any]]:
85
+ return _project_items(self, items, fields, allowed_fields=ACTOR_CANONICAL_FIELDS)
86
 
87
 
88
  def _project_user_like_items(
89
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
90
  ) -> list[dict[str, Any]]:
91
+ return _project_items(
92
+ self, items, fields, allowed_fields=USER_LIKES_CANONICAL_FIELDS
93
+ )
94
 
95
 
96
  def _project_activity_items(
97
  self: Any, items: list[dict[str, Any]], fields: list[str] | None
98
  ) -> list[dict[str, Any]]:
99
+ return _project_items(
100
+ self, items, fields, allowed_fields=ACTIVITY_CANONICAL_FIELDS
101
+ )
102
+
103
+
104
+ def _project_discussion_items(
105
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
106
+ ) -> list[dict[str, Any]]:
107
+ return _project_items(
108
+ self, items, fields, allowed_fields=DISCUSSION_CANONICAL_FIELDS
109
+ )
110
+
111
+
112
+ def _project_discussion_detail_items(
113
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
114
+ ) -> list[dict[str, Any]]:
115
+ return _project_items(
116
+ self, items, fields, allowed_fields=DISCUSSION_DETAIL_CANONICAL_FIELDS
117
+ )
118
 
119
 
120
  def _normalize_where(
121
  self: Any,
122
  where: dict[str, Any] | None,
123
+ *,
124
+ allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
125
  ) -> dict[str, Any] | None:
126
  if not isinstance(where, dict) or not where:
127
  return where
128
+ allowed = _allowed_field_set(allowed_fields) if allowed_fields is not None else None
 
 
 
 
129
  normalized: dict[str, Any] = {}
130
  for key, value in where.items():
131
  raw_key = str(key).strip()
132
  if not raw_key:
133
  continue
134
+ if allowed is not None and raw_key not in allowed:
135
+ raise ValueError(
136
+ f"Unsupported filter fields {[raw_key]}. Allowed fields: {sorted(allowed)}"
137
+ )
138
+ normalized[raw_key] = value
139
  return normalized
140
 
141
 
 
194
  items: list[dict[str, Any]],
195
  where: dict[str, Any] | None,
196
  *,
197
+ allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
198
  ) -> list[dict[str, Any]]:
199
+ normalized_where = _normalize_where(self, where, allowed_fields=allowed_fields)
200
  if not isinstance(normalized_where, dict) or not normalized_where:
201
  return items
202
  return [row for row in items if _item_matches_where(self, row, normalized_where)]
monty_api/tool_entrypoints.py CHANGED
@@ -5,6 +5,7 @@ from __future__ import annotations
5
 
6
  import sys
7
  from pathlib import Path
 
8
 
9
  _PACKAGE_DIR = Path(__file__).resolve().parent
10
  _ROOT_DIR = _PACKAGE_DIR.parent
@@ -13,7 +14,40 @@ for candidate in (_ROOT_DIR, _PACKAGE_DIR):
13
  if candidate_str not in sys.path:
14
  sys.path.insert(0, candidate_str)
15
 
16
- from monty_api import HELPER_EXTERNALS, hf_hub_query, hf_hub_query_raw, main # noqa: E402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  __all__ = [
19
  "HELPER_EXTERNALS",
 
5
 
6
  import sys
7
  from pathlib import Path
8
+ from typing import Any
9
 
10
  _PACKAGE_DIR = Path(__file__).resolve().parent
11
  _ROOT_DIR = _PACKAGE_DIR.parent
 
14
  if candidate_str not in sys.path:
15
  sys.path.insert(0, candidate_str)
16
 
17
+ from monty_api import ( # noqa: E402
18
+ HELPER_EXTERNALS,
19
+ hf_hub_query as _hf_hub_query,
20
+ hf_hub_query_raw as _hf_hub_query_raw,
21
+ main,
22
+ )
23
+
24
+
25
+ async def hf_hub_query(
26
+ query: str,
27
+ code: str,
28
+ max_calls: int | None = None,
29
+ timeout_sec: int | None = None,
30
+ ) -> dict[str, Any]:
31
+ return await _hf_hub_query(
32
+ query=query,
33
+ code=code,
34
+ max_calls=max_calls,
35
+ timeout_sec=timeout_sec,
36
+ )
37
+
38
+
39
+ async def hf_hub_query_raw(
40
+ query: str,
41
+ code: str,
42
+ max_calls: int | None = None,
43
+ timeout_sec: int | None = None,
44
+ ) -> Any:
45
+ return await _hf_hub_query_raw(
46
+ query=query,
47
+ code=code,
48
+ max_calls=max_calls,
49
+ timeout_sec=timeout_sec,
50
+ )
51
 
52
  __all__ = [
53
  "HELPER_EXTERNALS",
monty_api/validation.py CHANGED
@@ -155,8 +155,8 @@ def _summarize_limit_hit(helper_name: str, result: Any) -> dict[str, Any] | None
155
  "truncated": meta.get("truncated"),
156
  "truncated_by": meta.get("truncated_by"),
157
  "more_available": meta.get("more_available"),
158
- "requested_return_limit": meta.get("requested_return_limit"),
159
- "applied_return_limit": meta.get("applied_return_limit"),
160
  "next_request_hint": meta.get("next_request_hint"),
161
  }
162
  if meta.get("scan_limit") is not None:
 
155
  "truncated": meta.get("truncated"),
156
  "truncated_by": meta.get("truncated_by"),
157
  "more_available": meta.get("more_available"),
158
+ "requested_limit": meta.get("requested_limit"),
159
+ "applied_limit": meta.get("applied_limit"),
160
  "next_request_hint": meta.get("next_request_hint"),
161
  }
162
  if meta.get("scan_limit") is not None:
tool_entrypoints.py CHANGED
@@ -5,6 +5,7 @@ from __future__ import annotations
5
 
6
  import sys
7
  from pathlib import Path
 
8
 
9
  _PACKAGE_DIR = Path(__file__).resolve().parent
10
  _ROOT_DIR = _PACKAGE_DIR.parent
@@ -13,7 +14,40 @@ for candidate in (_ROOT_DIR, _PACKAGE_DIR):
13
  if candidate_str not in sys.path:
14
  sys.path.insert(0, candidate_str)
15
 
16
- from monty_api import HELPER_EXTERNALS, hf_hub_query, hf_hub_query_raw, main # noqa: E402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  __all__ = [
19
  "HELPER_EXTERNALS",
 
5
 
6
  import sys
7
  from pathlib import Path
8
+ from typing import Any
9
 
10
  _PACKAGE_DIR = Path(__file__).resolve().parent
11
  _ROOT_DIR = _PACKAGE_DIR.parent
 
14
  if candidate_str not in sys.path:
15
  sys.path.insert(0, candidate_str)
16
 
17
+ from monty_api import ( # noqa: E402
18
+ HELPER_EXTERNALS,
19
+ hf_hub_query as _hf_hub_query,
20
+ hf_hub_query_raw as _hf_hub_query_raw,
21
+ main,
22
+ )
23
+
24
+
25
+ async def hf_hub_query(
26
+ query: str,
27
+ code: str,
28
+ max_calls: int | None = None,
29
+ timeout_sec: int | None = None,
30
+ ) -> dict[str, Any]:
31
+ return await _hf_hub_query(
32
+ query=query,
33
+ code=code,
34
+ max_calls=max_calls,
35
+ timeout_sec=timeout_sec,
36
+ )
37
+
38
+
39
+ async def hf_hub_query_raw(
40
+ query: str,
41
+ code: str,
42
+ max_calls: int | None = None,
43
+ timeout_sec: int | None = None,
44
+ ) -> Any:
45
+ return await _hf_hub_query_raw(
46
+ query=query,
47
+ code=code,
48
+ max_calls=max_calls,
49
+ timeout_sec=timeout_sec,
50
+ )
51
 
52
  __all__ = [
53
  "HELPER_EXTERNALS",