lewtun HF Staff commited on
Commit
5db99fa
·
unverified ·
1 Parent(s): 7867a7a

Expose OpenAI models in the UI & make Claude model picker configurable (#183)

Browse files

* Add agent dev server notes

* Make frontend model configurable

* Support env-selected frontend models

* Use Claude-specific model env var

* Add GPT-5.5 to web model picker

* Gate GPT-5.5 as a premium model

* Avoid duplicate session model fetch

* Remove legacy Claude quota aliases

* Document GitHub CLI PR body workflow

* Gate only deployed paid model IDs

* Nits

AGENTS.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agent Notes
2
+
3
+ ## Local Dev Servers
4
+
5
+ - Frontend: from `frontend/`, run `npm ci` if dependencies are missing, then `npm run dev`.
6
+ - Backend: from `backend/`, run `uv run uvicorn main:app --host ::1 --port 7860`.
7
+ - Frontend URL: http://localhost:5173/
8
+ - Backend health check: `curl -g http://[::1]:7860/api`
9
+ - Frontend proxy health check: `curl http://localhost:5173/api`
10
+
11
+ Notes:
12
+
13
+ - Vite proxies `/api` and `/auth` to `http://localhost:7860`.
14
+ - If `127.0.0.1:7860` is already owned by another local process, binding the backend to `::1` lets the Vite proxy resolve `localhost` cleanly.
15
+ - Prefer `npm ci` over `npm install` for setup, since `npm install` may rewrite `frontend/package-lock.json` metadata depending on npm version.
16
+ - Production defaults to the Bedrock Claude model. For local development with a personal Anthropic key, set `ANTHROPIC_API_KEY` and `ML_INTERN_CLAUDE_MODEL_ID=anthropic/claude-opus-4-6` before starting the backend. Other models are selected through the app's model switcher.
17
+
18
+ ## GitHub CLI
19
+
20
+ - For multiline PR descriptions, prefer `gh pr edit <number> --body-file <file>` over inline `--body` so shell quoting, `$` env-var names, backticks, and newlines are preserved correctly.
backend/dependencies.py CHANGED
@@ -111,7 +111,7 @@ async def _fetch_user_plan(token: str) -> str:
111
 
112
  # OAuth whoami sets `type: "user"` and surfaces Pro via the `isPro` boolean
113
  # — see Space discussion #21. HF-Jobs eligibility (PR #172) ignores plan
114
- # entirely; the Claude daily-cap tier is still a free vs pro/org split.
115
  if whoami.get("isPro") is True or whoami.get("is_pro") is True:
116
  return "pro"
117
  plan_str = ""
 
111
 
112
  # OAuth whoami sets `type: "user"` and surfaces Pro via the `isPro` boolean
113
  # — see Space discussion #21. HF-Jobs eligibility (PR #172) ignores plan
114
+ # entirely; the premium-model daily-cap tier is still a free vs pro/org split.
115
  if whoami.get("isPro") is True or whoami.get("is_pro") is True:
116
  return "pro"
117
  plan_str = ""
backend/routes/agent.py CHANGED
@@ -41,83 +41,111 @@ logger = logging.getLogger(__name__)
41
 
42
  router = APIRouter(prefix="/api", tags=["agent"])
43
 
44
- AVAILABLE_MODELS = [
45
- {
46
- "id": "moonshotai/Kimi-K2.6",
47
- "label": "Kimi K2.6",
48
- "provider": "huggingface",
49
- "tier": "free",
50
- "recommended": True,
51
- },
52
- {
53
- "id": "bedrock/us.anthropic.claude-opus-4-6-v1",
54
- "label": "Claude Opus 4.6",
55
- "provider": "anthropic",
56
- "tier": "pro",
57
- "recommended": True,
58
- },
59
- {
60
- "id": "MiniMaxAI/MiniMax-M2.7",
61
- "label": "MiniMax M2.7",
62
- "provider": "huggingface",
63
- "tier": "free",
64
- },
65
- {
66
- "id": "zai-org/GLM-5.1",
67
- "label": "GLM 5.1",
68
- "provider": "huggingface",
69
- "tier": "free",
70
- },
71
- ]
72
-
73
-
74
- def _is_anthropic_model(model_id: str) -> bool:
75
- return "anthropic" in model_id
76
-
77
-
78
- async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
79
- """403 if a non-``huggingface``-org user tries to select an Anthropic model.
80
-
81
- Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
82
- other model in ``AVAILABLE_MODELS`` is routed through HF Router and
83
- billed via ``X-HF-Bill-To``. The gate only fires for Anthropic so
84
- non-HF users can still freely switch between the free models.
85
-
86
- Pattern: https://github.com/huggingface/ml-intern/pull/63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  """
88
- if not _is_anthropic_model(model_id):
89
  return
90
  if not await require_huggingface_org_member(request):
91
  raise HTTPException(
92
  status_code=403,
93
  detail={
94
- "error": "anthropic_restricted",
95
  "message": (
96
- "Opus is gated to HF staff. Pick a free model — "
97
  "Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead."
98
  ),
99
  },
100
  )
101
 
102
 
103
- async def _enforce_claude_quota(
104
  user: dict[str, Any],
105
  agent_session: AgentSession,
106
  ) -> None:
107
- """Charge the user's daily Claude quota on first use of Anthropic in a session.
108
 
109
  Runs at *message-submit* time, not session-create time — so spinning up a
110
- Claude session to look around doesn't burn quota. The ``claude_counted``
111
- flag on ``AgentSession`` guards against re-counting the same session.
 
112
 
113
- No-ops when the session's current model isn't Anthropic, or when this
114
  session has already been charged. Raises 429 when the user has hit
115
  their daily cap.
116
  """
117
  if agent_session.claude_counted:
118
  return
119
  model_name = agent_session.session.config.model_name
120
- if not _is_anthropic_model(model_name):
121
  return
122
  user_id = user["user_id"]
123
  cap = user_quotas.daily_cap_for(user.get("plan"))
@@ -126,11 +154,11 @@ async def _enforce_claude_quota(
126
  raise HTTPException(
127
  status_code=429,
128
  detail={
129
- "error": "claude_daily_cap",
130
  "plan": user.get("plan", "free"),
131
  "cap": cap,
132
  "message": (
133
- "Daily Claude limit reached. Upgrade to HF Pro for "
134
  f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
135
  ),
136
  },
@@ -306,8 +334,8 @@ async def create_session(
306
  behalf of the user.
307
 
308
  Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
309
- ids are rejected (400). The Claude-quota gate runs at message-submit
310
- time, not here — spinning up an Opus session to look around is free.
311
 
312
  Returns 503 if the server or user has reached the session limit.
313
  """
@@ -327,10 +355,9 @@ async def create_session(
327
  if model and model not in valid_ids:
328
  raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
329
 
330
- # Opus is gated to HF staff (PR #63). Only fires when the resolved model
331
- # is Anthropic; free models pass through.
332
  resolved_model = model or session_manager.config.model_name
333
- await _require_hf_for_anthropic(request, resolved_model)
334
 
335
  try:
336
  session_id = await session_manager.create_session(
@@ -355,7 +382,7 @@ async def restore_session_summary(
355
  session's context as a user-role system note.
356
 
357
  Optional ``"model"`` in the body overrides the session's LLM. The
358
- Claude-quota gate runs at message-submit time, not here.
359
  """
360
  messages = body.get("messages")
361
  if not isinstance(messages, list) or not messages:
@@ -369,7 +396,7 @@ async def restore_session_summary(
369
  raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
370
 
371
  resolved_model = model or session_manager.config.model_name
372
- await _require_hf_for_anthropic(request, resolved_model)
373
 
374
  try:
375
  session_id = await session_manager.create_session(
@@ -417,10 +444,10 @@ async def set_session_model(
417
 
418
  Takes effect on the next LLM call in that session — other sessions
419
  (including other browser tabs) are unaffected. Model switches don't
420
- charge quota — the Claude-quota gate only fires at message-submit time.
421
 
422
- Switching TO an Anthropic model requires HF org membership (PR #63);
423
- free-model switches are unrestricted.
424
  """
425
  agent_session = await _check_session_access(session_id, user, request)
426
  model_id = body.get("model")
@@ -429,7 +456,7 @@ async def set_session_model(
429
  valid_ids = {m["id"] for m in AVAILABLE_MODELS}
430
  if model_id not in valid_ids:
431
  raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
432
- await _require_hf_for_anthropic(request, model_id)
433
  if not agent_session:
434
  raise HTTPException(status_code=404, detail="Session not found")
435
  await session_manager.update_session_model(session_id, model_id)
@@ -463,15 +490,16 @@ async def set_session_notifications(
463
 
464
  @router.get("/user/quota")
465
  async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
466
- """Return the user's plan tier and today's Claude-session quota state."""
467
  plan = user.get("plan", "free")
468
  used = await user_quotas.get_claude_used_today(user["user_id"])
469
  cap = user_quotas.daily_cap_for(plan)
 
470
  return {
471
  "plan": plan,
472
- "claude_used_today": used,
473
- "claude_daily_cap": cap,
474
- "claude_remaining": max(0, cap - used),
475
  }
476
 
477
 
@@ -518,7 +546,7 @@ async def submit_input(
518
  ) -> dict:
519
  """Submit user input to a session. Only accessible by the session owner."""
520
  agent_session = await _check_session_access(request.session_id, user)
521
- await _enforce_claude_quota(user, agent_session)
522
  success = await session_manager.submit_user_input(request.session_id, request.text)
523
  if not success:
524
  raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -570,12 +598,12 @@ async def chat_sse(
570
  text = body.get("text")
571
  approvals = body.get("approvals")
572
 
573
- # Gate user-message sends against the daily Claude quota. Approvals are
574
  # continuations of an in-progress turn — the session was already charged
575
  # on its first message, so we skip the gate there.
576
  if text is not None and not approvals:
577
  try:
578
- await _enforce_claude_quota(user, agent_session)
579
  except HTTPException:
580
  broadcaster.unsubscribe(sub_id)
581
  raise
 
41
 
42
  router = APIRouter(prefix="/api", tags=["agent"])
43
 
44
+ DEFAULT_CLAUDE_MODEL_ID = "bedrock/us.anthropic.claude-opus-4-6-v1"
45
+ GATED_MODEL_IDS = {
46
+ DEFAULT_CLAUDE_MODEL_ID,
47
+ "openai/gpt-5.5",
48
+ }
49
+
50
+
51
+ def _claude_picker_model_id() -> str:
52
+ """Return the model ID used by the Claude option in the UI.
53
+
54
+ The frontend config sets ``session_manager.config.model_name`` from
55
+ ``ML_INTERN_CLAUDE_MODEL_ID`` when that env var is present, otherwise it
56
+ falls back to the production Bedrock Claude model. This function only
57
+ exposes that resolved config value for the Claude picker; non-Claude models
58
+ are listed separately in the model switcher.
59
+ """
60
+ return session_manager.config.model_name
61
+
62
+
63
+ def _available_models() -> list[dict[str, Any]]:
64
+ models = [
65
+ {
66
+ "id": "moonshotai/Kimi-K2.6",
67
+ "label": "Kimi K2.6",
68
+ "provider": "huggingface",
69
+ "tier": "free",
70
+ "recommended": True,
71
+ },
72
+ {
73
+ "id": _claude_picker_model_id(),
74
+ "label": "Claude Opus 4.6",
75
+ "provider": "anthropic",
76
+ "tier": "pro",
77
+ "recommended": True,
78
+ },
79
+ {
80
+ "id": "openai/gpt-5.5",
81
+ "label": "GPT-5.5",
82
+ "provider": "openai",
83
+ "tier": "pro",
84
+ },
85
+ {
86
+ "id": "MiniMaxAI/MiniMax-M2.7",
87
+ "label": "MiniMax M2.7",
88
+ "provider": "huggingface",
89
+ "tier": "free",
90
+ },
91
+ {
92
+ "id": "zai-org/GLM-5.1",
93
+ "label": "GLM 5.1",
94
+ "provider": "huggingface",
95
+ "tier": "free",
96
+ },
97
+ ]
98
+ return models
99
+
100
+
101
+ AVAILABLE_MODELS = _available_models()
102
+
103
+
104
+ def _is_gated_model(model_id: str) -> bool:
105
+ return model_id in GATED_MODEL_IDS
106
+
107
+
108
+ async def _require_hf_for_gated_model(request: Request, model_id: str) -> None:
109
+ """403 if a non-``huggingface``-org user tries to select a gated model.
110
+
111
+ Gated models are deployed paid endpoints backed by service-owned
112
+ credentials. The gate only fires for deployed paid models so non-HF users
113
+ can still freely switch between the free models.
114
  """
115
+ if not _is_gated_model(model_id):
116
  return
117
  if not await require_huggingface_org_member(request):
118
  raise HTTPException(
119
  status_code=403,
120
  detail={
121
+ "error": "premium_model_restricted",
122
  "message": (
123
+ "Premium models are gated to HF staff. Pick a free model — "
124
  "Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead."
125
  ),
126
  },
127
  )
128
 
129
 
130
+ async def _enforce_gated_model_quota(
131
  user: dict[str, Any],
132
  agent_session: AgentSession,
133
  ) -> None:
134
+ """Charge the user's daily gated-model quota on first use in a session.
135
 
136
  Runs at *message-submit* time, not session-create time — so spinning up a
137
+ gated-model session to look around doesn't burn quota. The
138
+ ``claude_counted`` flag on ``AgentSession`` guards against re-counting the
139
+ same session; the stored field name is kept for persistence compatibility.
140
 
141
+ No-ops when the session's current model isn't gated, or when this
142
  session has already been charged. Raises 429 when the user has hit
143
  their daily cap.
144
  """
145
  if agent_session.claude_counted:
146
  return
147
  model_name = agent_session.session.config.model_name
148
+ if not _is_gated_model(model_name):
149
  return
150
  user_id = user["user_id"]
151
  cap = user_quotas.daily_cap_for(user.get("plan"))
 
154
  raise HTTPException(
155
  status_code=429,
156
  detail={
157
+ "error": "premium_model_daily_cap",
158
  "plan": user.get("plan", "free"),
159
  "cap": cap,
160
  "message": (
161
+ "Daily premium model limit reached. Upgrade to HF Pro for "
162
  f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
163
  ),
164
  },
 
334
  behalf of the user.
335
 
336
  Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
337
+ ids are rejected (400). The gated-model quota runs at message-submit
338
+ time, not here — spinning up a session to look around is free.
339
 
340
  Returns 503 if the server or user has reached the session limit.
341
  """
 
355
  if model and model not in valid_ids:
356
  raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
357
 
358
+ # Deployed paid models are gated to HF staff; free and local-dev models pass through.
 
359
  resolved_model = model or session_manager.config.model_name
360
+ await _require_hf_for_gated_model(request, resolved_model)
361
 
362
  try:
363
  session_id = await session_manager.create_session(
 
382
  session's context as a user-role system note.
383
 
384
  Optional ``"model"`` in the body overrides the session's LLM. The
385
+ gated-model quota runs at message-submit time, not here.
386
  """
387
  messages = body.get("messages")
388
  if not isinstance(messages, list) or not messages:
 
396
  raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
397
 
398
  resolved_model = model or session_manager.config.model_name
399
+ await _require_hf_for_gated_model(request, resolved_model)
400
 
401
  try:
402
  session_id = await session_manager.create_session(
 
444
 
445
  Takes effect on the next LLM call in that session — other sessions
446
  (including other browser tabs) are unaffected. Model switches don't
447
+ charge quota — the gated-model quota only fires at message-submit time.
448
 
449
+ Switching TO a gated deployed model requires HF org membership; free-model
450
+ and local-dev direct provider switches are unrestricted.
451
  """
452
  agent_session = await _check_session_access(session_id, user, request)
453
  model_id = body.get("model")
 
456
  valid_ids = {m["id"] for m in AVAILABLE_MODELS}
457
  if model_id not in valid_ids:
458
  raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
459
+ await _require_hf_for_gated_model(request, model_id)
460
  if not agent_session:
461
  raise HTTPException(status_code=404, detail="Session not found")
462
  await session_manager.update_session_model(session_id, model_id)
 
490
 
491
  @router.get("/user/quota")
492
  async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
493
+ """Return the user's plan tier and today's premium-model quota state."""
494
  plan = user.get("plan", "free")
495
  used = await user_quotas.get_claude_used_today(user["user_id"])
496
  cap = user_quotas.daily_cap_for(plan)
497
+ remaining = max(0, cap - used)
498
  return {
499
  "plan": plan,
500
+ "premium_used_today": used,
501
+ "premium_daily_cap": cap,
502
+ "premium_remaining": remaining,
503
  }
504
 
505
 
 
546
  ) -> dict:
547
  """Submit user input to a session. Only accessible by the session owner."""
548
  agent_session = await _check_session_access(request.session_id, user)
549
+ await _enforce_gated_model_quota(user, agent_session)
550
  success = await session_manager.submit_user_input(request.session_id, request.text)
551
  if not success:
552
  raise HTTPException(status_code=404, detail="Session not found or inactive")
 
598
  text = body.get("text")
599
  approvals = body.get("approvals")
600
 
601
+ # Gate user-message sends against the daily gated-model quota. Approvals are
602
  # continuations of an in-progress turn — the session was already charged
603
  # on its first message, so we skip the gate there.
604
  if text is not None and not approvals:
605
  try:
606
+ await _enforce_gated_model_quota(user, agent_session)
607
  except HTTPException:
608
  broadcaster.unsubscribe(sub_id)
609
  raise
backend/user_quotas.py CHANGED
@@ -1,12 +1,15 @@
1
- """Daily quota for Claude session creations.
2
 
3
- Tracks per-user Claude session starts against a daily cap derived from the
4
- user's HF plan. MongoDB is the source of truth when configured; the
5
  in-process dict remains the fallback for local/dev/test runs.
6
 
7
- Unit: session *creations*, not messages. A user who selects Claude in a new
8
- session consumes one quota point; switching an existing Claude session to
9
- Claude again doesn't (`AgentSession.claude_counted` guards that).
 
 
 
10
 
11
  Cap tiers:
12
  free user → CLAUDE_FREE_DAILY (1)
 
1
+ """Daily quota for premium model session creations.
2
 
3
+ Tracks per-user premium model session starts against a daily cap derived from
4
+ the user's HF plan. MongoDB is the source of truth when configured; the
5
  in-process dict remains the fallback for local/dev/test runs.
6
 
7
+ The public names still say ``claude`` because this quota bucket originally
8
+ only covered Claude and the persisted session field uses that name.
9
+
10
+ Unit: session *creations*, not messages. A user who sends with a premium model
11
+ in a new session consumes one quota point; switching an already-counted session
12
+ back to a premium model doesn't (`AgentSession.claude_counted` guards that).
13
 
14
  Cap tiers:
15
  free user → CLAUDE_FREE_DAILY (1)
configs/frontend_agent_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "model_name": "bedrock/us.anthropic.claude-opus-4-6-v1",
3
  "save_sessions": true,
4
  "session_dataset_repo": "smolagents/ml-intern-sessions",
5
  "yolo_mode": false,
 
1
  {
2
+ "model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
3
  "save_sessions": true,
4
  "session_dataset_repo": "smolagents/ml-intern-sessions",
5
  "yolo_mode": false,
frontend/src/components/Chat/ChatInput.tsx CHANGED
@@ -8,7 +8,13 @@ import { useUserQuota } from '@/hooks/useUserQuota';
8
  import ClaudeCapDialog from '@/components/ClaudeCapDialog';
9
  import JobsUpgradeDialog from '@/components/JobsUpgradeDialog';
10
  import { useAgentStore } from '@/store/agentStore';
11
- import { CLAUDE_MODEL_PATH, FIRST_FREE_MODEL_PATH, isClaudePath } from '@/utils/model';
 
 
 
 
 
 
12
 
13
  // Model configuration
14
  interface ModelOption {
@@ -25,7 +31,7 @@ const getHfAvatarUrl = (modelId: string) => {
25
  return `https://huggingface.co/api/avatars/${org}`;
26
  };
27
 
28
- const MODEL_OPTIONS: ModelOption[] = [
29
  {
30
  id: 'kimi-k2.6',
31
  name: 'Kimi K2.6',
@@ -42,6 +48,13 @@ const MODEL_OPTIONS: ModelOption[] = [
42
  avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',
43
  recommended: true,
44
  },
 
 
 
 
 
 
 
45
  {
46
  id: 'minimax-m2.7',
47
  name: 'MiniMax M2.7',
@@ -58,8 +71,8 @@ const MODEL_OPTIONS: ModelOption[] = [
58
  },
59
  ];
60
 
61
- const findModelByPath = (path: string): ModelOption | undefined => {
62
- return MODEL_OPTIONS.find(m => m.modelPath === path || path?.includes(m.id));
63
  };
64
 
65
  interface ChatInputProps {
@@ -72,16 +85,20 @@ interface ChatInputProps {
72
  }
73
 
74
  const isClaudeModel = (m: ModelOption) => isClaudePath(m.modelPath);
75
- const firstFreeModel = () => MODEL_OPTIONS.find(m => !isClaudeModel(m)) ?? MODEL_OPTIONS[0];
 
76
 
77
  export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
78
  const [input, setInput] = useState('');
79
  const inputRef = useRef<HTMLTextAreaElement>(null);
80
- const [selectedModelId, setSelectedModelId] = useState<string>(MODEL_OPTIONS[0].id);
 
 
 
81
  const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
82
  const { quota, refresh: refreshQuota } = useUserQuota();
83
  // The daily-cap dialog is triggered from two places: (a) a 429 returned
84
- // from the chat transport when the user tries to send on Opus over cap —
85
  // surfaced via the agent-store flag — and (b) nothing else right now
86
  // (switching models is free). Keeping the open state in the store means
87
  // the hook layer can flip it without threading props through.
@@ -92,6 +109,41 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
92
  const [awaitingTopUp, setAwaitingTopUp] = useState(false);
93
  const lastSentRef = useRef<string>('');
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  // Model is per-session: fetch this tab's current model every time the
96
  // session changes. Other tabs keep their own selections independently.
97
  useEffect(() => {
@@ -102,7 +154,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
102
  .then((data) => {
103
  if (cancelled) return;
104
  if (data?.model) {
105
- const model = findModelByPath(data.model);
106
  if (model) setSelectedModelId(model.id);
107
  }
108
  })
@@ -110,7 +162,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
110
  return () => { cancelled = true; };
111
  }, [sessionId]);
112
 
113
- const selectedModel = MODEL_OPTIONS.find(m => m.id === selectedModelId) || MODEL_OPTIONS[0];
114
 
115
  // Auto-focus the textarea when the session becomes ready
116
  useEffect(() => {
@@ -127,7 +179,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
127
  }
128
  }, [input, disabled, onSend]);
129
 
130
- // When the chat transport reports a Claude-quota 429, restore the typed
131
  // text so the user doesn't lose their message.
132
  useEffect(() => {
133
  if (claudeQuotaExhausted && lastSentRef.current) {
@@ -178,12 +230,12 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
178
  }, [setClaudeQuotaExhausted]);
179
 
180
  // "Use a free model" — switch the current session to Kimi (or the first
181
- // non-Anthropic option) and auto-retry the send that tripped the cap.
182
  const handleUseFreeModel = useCallback(async () => {
183
  setClaudeQuotaExhausted(false);
184
  if (!sessionId) return;
185
- const free = MODEL_OPTIONS.find(m => m.modelPath === FIRST_FREE_MODEL_PATH)
186
- ?? firstFreeModel();
187
  try {
188
  const res = await apiFetch(`/api/session/${sessionId}/model`, {
189
  method: 'POST',
@@ -199,14 +251,14 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
199
  }
200
  }
201
  } catch { /* ignore */ }
202
- }, [sessionId, onSend, setClaudeQuotaExhausted]);
203
 
204
- const handleClaudeUpgradeClick = useCallback(async () => {
205
  if (!sessionId) return;
206
  try {
207
  await apiFetch(`/api/pro-click/${sessionId}`, {
208
  method: 'POST',
209
- body: JSON.stringify({ source: 'claude_cap_dialog', target: 'pro_pricing' }),
210
  });
211
  } catch {
212
  /* tracking is best-effort */
@@ -254,14 +306,14 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
254
  return () => document.removeEventListener('visibilitychange', onVisible);
255
  }, [awaitingTopUp, jobsUpgradeRequired, handleJobsRetry]);
256
 
257
- // Hide the chip until the user has actually burned quota an unused
258
- // Opus session shouldn't populate a counter.
259
- const claudeChip = (() => {
260
- if (!quota || quota.claudeUsedToday === 0) return null;
261
  if (quota.plan === 'free') {
262
- return quota.claudeRemaining > 0 ? 'Free today' : 'Pro only';
263
  }
264
- return `${quota.claudeUsedToday}/${quota.claudeDailyCap} today`;
265
  })();
266
 
267
  return (
@@ -426,7 +478,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
426
  }
427
  }}
428
  >
429
- {MODEL_OPTIONS.map((model) => (
430
  <MenuItem
431
  key={model.id}
432
  onClick={() => handleSelectModel(model)}
@@ -462,9 +514,9 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
462
  }}
463
  />
464
  )}
465
- {isClaudeModel(model) && claudeChip && (
466
  <Chip
467
- label={claudeChip}
468
  size="small"
469
  sx={{
470
  height: '18px',
@@ -489,10 +541,10 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
489
  <ClaudeCapDialog
490
  open={claudeQuotaExhausted}
491
  plan={quota?.plan ?? 'free'}
492
- cap={quota?.claudeDailyCap ?? 1}
493
  onClose={handleCapDialogClose}
494
  onUseFreeModel={handleUseFreeModel}
495
- onUpgrade={handleClaudeUpgradeClick}
496
  />
497
  <JobsUpgradeDialog
498
  open={!!jobsUpgradeRequired}
 
8
  import ClaudeCapDialog from '@/components/ClaudeCapDialog';
9
  import JobsUpgradeDialog from '@/components/JobsUpgradeDialog';
10
  import { useAgentStore } from '@/store/agentStore';
11
+ import {
12
+ CLAUDE_MODEL_PATH,
13
+ FIRST_FREE_MODEL_PATH,
14
+ GPT_55_MODEL_PATH,
15
+ isClaudePath,
16
+ isPremiumPath,
17
+ } from '@/utils/model';
18
 
19
  // Model configuration
20
  interface ModelOption {
 
31
  return `https://huggingface.co/api/avatars/${org}`;
32
  };
33
 
34
+ const DEFAULT_MODEL_OPTIONS: ModelOption[] = [
35
  {
36
  id: 'kimi-k2.6',
37
  name: 'Kimi K2.6',
 
48
  avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',
49
  recommended: true,
50
  },
51
+ {
52
+ id: 'gpt-5.5',
53
+ name: 'GPT-5.5',
54
+ description: 'OpenAI',
55
+ modelPath: GPT_55_MODEL_PATH,
56
+ avatarUrl: 'https://huggingface.co/api/avatars/openai',
57
+ },
58
  {
59
  id: 'minimax-m2.7',
60
  name: 'MiniMax M2.7',
 
71
  },
72
  ];
73
 
74
+ const findModelByPath = (path: string, options: ModelOption[]): ModelOption | undefined => {
75
+ return options.find(m => m.modelPath === path || path?.includes(m.id));
76
  };
77
 
78
  interface ChatInputProps {
 
85
  }
86
 
87
  const isClaudeModel = (m: ModelOption) => isClaudePath(m.modelPath);
88
+ const isPremiumModel = (m: ModelOption) => isPremiumPath(m.modelPath);
89
+ const firstFreeModel = (options: ModelOption[]) => options.find(m => !isPremiumModel(m)) ?? options[0];
90
 
91
  export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
92
  const [input, setInput] = useState('');
93
  const inputRef = useRef<HTMLTextAreaElement>(null);
94
+ const [modelOptions, setModelOptions] = useState<ModelOption[]>(DEFAULT_MODEL_OPTIONS);
95
+ const modelOptionsRef = useRef<ModelOption[]>(DEFAULT_MODEL_OPTIONS);
96
+ const sessionIdRef = useRef<string | undefined>(sessionId);
97
+ const [selectedModelId, setSelectedModelId] = useState<string>(DEFAULT_MODEL_OPTIONS[0].id);
98
  const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
99
  const { quota, refresh: refreshQuota } = useUserQuota();
100
  // The daily-cap dialog is triggered from two places: (a) a 429 returned
101
+ // from the chat transport when the user tries to send on a premium model over cap —
102
  // surfaced via the agent-store flag — and (b) nothing else right now
103
  // (switching models is free). Keeping the open state in the store means
104
  // the hook layer can flip it without threading props through.
 
109
  const [awaitingTopUp, setAwaitingTopUp] = useState(false);
110
  const lastSentRef = useRef<string>('');
111
 
112
+ useEffect(() => {
113
+ modelOptionsRef.current = modelOptions;
114
+ }, [modelOptions]);
115
+
116
+ useEffect(() => {
117
+ sessionIdRef.current = sessionId;
118
+ }, [sessionId]);
119
+
120
+ useEffect(() => {
121
+ let cancelled = false;
122
+ apiFetch('/api/config/model')
123
+ .then((res) => (res.ok ? res.json() : null))
124
+ .then((data) => {
125
+ if (cancelled || !data?.available) return;
126
+ const claude = data.available.find((m: { provider?: string; id?: string }) => (
127
+ m.provider === 'anthropic' && m.id
128
+ ));
129
+ if (!claude?.id) return;
130
+
131
+ const next = DEFAULT_MODEL_OPTIONS.map((option) => (
132
+ isClaudeModel(option)
133
+ ? { ...option, modelPath: claude.id, name: claude.label ?? option.name }
134
+ : option
135
+ ));
136
+ modelOptionsRef.current = next;
137
+ setModelOptions(next);
138
+ if (!sessionIdRef.current) {
139
+ const current = data.current ? findModelByPath(data.current, next) : null;
140
+ if (current) setSelectedModelId(current.id);
141
+ }
142
+ })
143
+ .catch(() => { /* ignore */ });
144
+ return () => { cancelled = true; };
145
+ }, []);
146
+
147
  // Model is per-session: fetch this tab's current model every time the
148
  // session changes. Other tabs keep their own selections independently.
149
  useEffect(() => {
 
154
  .then((data) => {
155
  if (cancelled) return;
156
  if (data?.model) {
157
+ const model = findModelByPath(data.model, modelOptionsRef.current);
158
  if (model) setSelectedModelId(model.id);
159
  }
160
  })
 
162
  return () => { cancelled = true; };
163
  }, [sessionId]);
164
 
165
+ const selectedModel = modelOptions.find(m => m.id === selectedModelId) || modelOptions[0];
166
 
167
  // Auto-focus the textarea when the session becomes ready
168
  useEffect(() => {
 
179
  }
180
  }, [input, disabled, onSend]);
181
 
182
+ // When the chat transport reports a premium-model quota 429, restore the typed
183
  // text so the user doesn't lose their message.
184
  useEffect(() => {
185
  if (claudeQuotaExhausted && lastSentRef.current) {
 
230
  }, [setClaudeQuotaExhausted]);
231
 
232
  // "Use a free model" — switch the current session to Kimi (or the first
233
+ // non-premium option) and auto-retry the send that tripped the cap.
234
  const handleUseFreeModel = useCallback(async () => {
235
  setClaudeQuotaExhausted(false);
236
  if (!sessionId) return;
237
+ const free = modelOptions.find(m => m.modelPath === FIRST_FREE_MODEL_PATH)
238
+ ?? firstFreeModel(modelOptions);
239
  try {
240
  const res = await apiFetch(`/api/session/${sessionId}/model`, {
241
  method: 'POST',
 
251
  }
252
  }
253
  } catch { /* ignore */ }
254
+ }, [sessionId, onSend, setClaudeQuotaExhausted, modelOptions]);
255
 
256
+ const handlePremiumUpgradeClick = useCallback(async () => {
257
  if (!sessionId) return;
258
  try {
259
  await apiFetch(`/api/pro-click/${sessionId}`, {
260
  method: 'POST',
261
+ body: JSON.stringify({ source: 'premium_cap_dialog', target: 'pro_pricing' }),
262
  });
263
  } catch {
264
  /* tracking is best-effort */
 
306
  return () => document.removeEventListener('visibilitychange', onVisible);
307
  }, [awaitingTopUp, jobsUpgradeRequired, handleJobsRetry]);
308
 
309
+ // Hide the chip until the user has actually burned quota; opening a
310
+ // premium-model session without sending should not populate a counter.
311
+ const premiumChip = (() => {
312
+ if (!quota || quota.premiumUsedToday === 0) return null;
313
  if (quota.plan === 'free') {
314
+ return quota.premiumRemaining > 0 ? 'Free today' : 'Pro only';
315
  }
316
+ return `${quota.premiumUsedToday}/${quota.premiumDailyCap} today`;
317
  })();
318
 
319
  return (
 
478
  }
479
  }}
480
  >
481
+ {modelOptions.map((model) => (
482
  <MenuItem
483
  key={model.id}
484
  onClick={() => handleSelectModel(model)}
 
514
  }}
515
  />
516
  )}
517
+ {isPremiumModel(model) && premiumChip && (
518
  <Chip
519
+ label={premiumChip}
520
  size="small"
521
  sx={{
522
  height: '18px',
 
541
  <ClaudeCapDialog
542
  open={claudeQuotaExhausted}
543
  plan={quota?.plan ?? 'free'}
544
+ cap={quota?.premiumDailyCap ?? 1}
545
  onClose={handleCapDialogClose}
546
  onUseFreeModel={handleUseFreeModel}
547
+ onUpgrade={handlePremiumUpgradeClick}
548
  />
549
  <JobsUpgradeDialog
550
  open={!!jobsUpgradeRequired}
frontend/src/components/ClaudeCapDialog.tsx CHANGED
@@ -55,15 +55,15 @@ export default function ClaudeCapDialog({
55
  <DialogTitle
56
  sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}
57
  >
58
- You've hit your Opus limit
59
  </DialogTitle>
60
  <DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>
61
  <DialogContentText
62
  sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
63
  >
64
- Opus costs an arm and a leg, so we unfortunately have to cap you at {cap}{' '}
65
- {cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin
66
- they are genuinely good and we use them all the time.
67
  </DialogContentText>
68
  <Box
69
  sx={{
@@ -85,14 +85,14 @@ export default function ClaudeCapDialog({
85
  letterSpacing: '0.02em',
86
  }}
87
  >
88
- HF Pro ($9/mo) — more Opus, more everything
89
  </Typography>
90
  <Typography
91
  variant="caption"
92
  sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
93
  >
94
- {PRO_CAP} Opus sessions/day here, 20× HF Inference credits, ZeroGPU access,
95
- and priority on Spaces hardware.
96
  </Typography>
97
  </Box>
98
  </DialogContent>
 
55
  <DialogTitle
56
  sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}
57
  >
58
+ You've hit your premium model limit
59
  </DialogTitle>
60
  <DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>
61
  <DialogContentText
62
  sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
63
  >
64
+ Opus and GPT-5.5 are expensive to run, so we cap premium models at {cap}{' '}
65
+ {cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin
66
+ instead.
67
  </DialogContentText>
68
  <Box
69
  sx={{
 
85
  letterSpacing: '0.02em',
86
  }}
87
  >
88
+ HF Pro ($9/mo) — more premium model sessions
89
  </Typography>
90
  <Typography
91
  variant="caption"
92
  sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
93
  >
94
+ {PRO_CAP} premium model sessions/day here, 20× HF Inference credits,
95
+ ZeroGPU access, and priority on Spaces hardware.
96
  </Typography>
97
  </Box>
98
  </DialogContent>
frontend/src/hooks/useAgentChat.ts CHANGED
@@ -346,7 +346,7 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
346
  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
347
  onError: (error) => {
348
  updateSession(sessionId, { isProcessing: false });
349
- // Claude daily-cap: open the cap dialog instead of the generic error
350
  // banner. Transport marks the error with this sentinel.
351
  if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {
352
  if (isActiveRef.current) {
 
346
  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
347
  onError: (error) => {
348
  updateSession(sessionId, { isProcessing: false });
349
+ // Premium-model daily cap: open the cap dialog instead of the generic error
350
  // banner. Transport marks the error with this sentinel.
351
  if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {
352
  if (isActiveRef.current) {
frontend/src/hooks/useUserQuota.ts CHANGED
@@ -1,5 +1,5 @@
1
  /**
2
- * Reads the current user's Claude daily quota + plan tier from the backend.
3
  *
4
  * Fetches once when the user becomes authenticated, and exposes a `refresh()`
5
  * that callers invoke after a successful session-create / model-switch so the
@@ -13,9 +13,9 @@ export type PlanTier = 'free' | 'pro' | 'org';
13
 
14
  export interface UserQuota {
15
  plan: PlanTier;
16
- claudeUsedToday: number;
17
- claudeDailyCap: number;
18
- claudeRemaining: number;
19
  }
20
 
21
  export function useUserQuota() {
@@ -32,9 +32,9 @@ export function useUserQuota() {
32
  const data = await res.json();
33
  setQuota({
34
  plan: (data.plan ?? 'free') as PlanTier,
35
- claudeUsedToday: data.claude_used_today ?? 0,
36
- claudeDailyCap: data.claude_daily_cap ?? 1,
37
- claudeRemaining: data.claude_remaining ?? 0,
38
  });
39
  } catch {
40
  /* backend unreachable — leave previous value */
 
1
  /**
2
+ * Reads the current user's premium-model daily quota + plan tier from the backend.
3
  *
4
  * Fetches once when the user becomes authenticated, and exposes a `refresh()`
5
  * that callers invoke after a successful session-create / model-switch so the
 
13
 
14
  export interface UserQuota {
15
  plan: PlanTier;
16
+ premiumUsedToday: number;
17
+ premiumDailyCap: number;
18
+ premiumRemaining: number;
19
  }
20
 
21
  export function useUserQuota() {
 
32
  const data = await res.json();
33
  setQuota({
34
  plan: (data.plan ?? 'free') as PlanTier,
35
+ premiumUsedToday: data.premium_used_today ?? 0,
36
+ premiumDailyCap: data.premium_daily_cap ?? 1,
37
+ premiumRemaining: data.premium_remaining ?? 0,
38
  });
39
  } catch {
40
  /* backend unreachable — leave previous value */
frontend/src/lib/sse-chat-transport.ts CHANGED
@@ -402,7 +402,7 @@ export class SSEChatTransport implements ChatTransport<UIMessage> {
402
  this.sideChannel.onSessionDead(sessionId);
403
  }
404
  if (response.status === 429) {
405
- // Claude daily-quota gate tripped. The prefix is the detection marker
406
  // for useAgentChat's onError handler, which surfaces the cap dialog
407
  // instead of a generic error banner.
408
  throw new Error('CLAUDE_QUOTA_EXHAUSTED');
 
402
  this.sideChannel.onSessionDead(sessionId);
403
  }
404
  if (response.status === 429) {
405
+ // Premium-model daily quota gate tripped. The prefix is the detection marker
406
  // for useAgentChat's onError handler, which surfaces the cap dialog
407
  // instead of a generic error banner.
408
  throw new Error('CLAUDE_QUOTA_EXHAUSTED');
frontend/src/store/agentStore.ts CHANGED
@@ -113,7 +113,7 @@ interface AgentStore {
113
  user: User | null;
114
  error: string | null;
115
  llmHealthError: LLMHealthError | null;
116
- /** Set when a Claude-send hits the daily quota ChatInput opens the cap dialog in response. */
117
  claudeQuotaExhausted: boolean;
118
  jobsUpgradeRequired: JobsUpgradeState | null;
119
 
 
113
  user: User | null;
114
  error: string | null;
115
  llmHealthError: LLMHealthError | null;
116
+ /** Set when a premium-model send hits the daily quota; ChatInput opens the cap dialog. */
117
  claudeQuotaExhausted: boolean;
118
  jobsUpgradeRequired: JobsUpgradeState | null;
119
 
frontend/src/utils/model.ts CHANGED
@@ -1,14 +1,19 @@
1
  /**
2
  * Shared model-id constants used by session-create call sites and the
3
- * ClaudeCapDialog "Use a free model" escape hatch.
4
  *
5
  * Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and
6
  * AVAILABLE_MODELS in backend/routes/agent.py.
7
  */
8
 
9
  export const CLAUDE_MODEL_PATH = 'bedrock/us.anthropic.claude-opus-4-6-v1';
 
10
  export const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';
11
 
12
  export function isClaudePath(modelPath: string | undefined): boolean {
13
  return !!modelPath && modelPath.includes('anthropic');
14
  }
 
 
 
 
 
1
  /**
2
  * Shared model-id constants used by session-create call sites and the
3
+ * premium-model cap dialog "Use a free model" escape hatch.
4
  *
5
  * Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and
6
  * AVAILABLE_MODELS in backend/routes/agent.py.
7
  */
8
 
9
  export const CLAUDE_MODEL_PATH = 'bedrock/us.anthropic.claude-opus-4-6-v1';
10
+ export const GPT_55_MODEL_PATH = 'openai/gpt-5.5';
11
  export const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';
12
 
13
  export function isClaudePath(modelPath: string | undefined): boolean {
14
  return !!modelPath && modelPath.includes('anthropic');
15
  }
16
+
17
+ export function isPremiumPath(modelPath: string | undefined): boolean {
18
+ return modelPath === CLAUDE_MODEL_PATH || modelPath === GPT_55_MODEL_PATH;
19
+ }
tests/unit/test_agent_model_gating.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for gated model handling in backend/routes/agent.py."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ from types import SimpleNamespace
6
+
7
+ import pytest
8
+ from fastapi import HTTPException
9
+
10
+ _BACKEND_DIR = Path(__file__).resolve().parent.parent.parent / "backend"
11
+ if str(_BACKEND_DIR) not in sys.path:
12
+ sys.path.insert(0, str(_BACKEND_DIR))
13
+
14
+ from routes import agent # noqa: E402
15
+
16
+
17
+ @pytest.fixture(autouse=True)
18
+ def _reset_quota_store():
19
+ agent.user_quotas._reset_for_tests()
20
+ yield
21
+ agent.user_quotas._reset_for_tests()
22
+
23
+
24
+ def test_gated_model_predicate_includes_bedrock_claude_and_gpt55_only():
25
+ assert agent._is_gated_model("bedrock/us.anthropic.claude-opus-4-6-v1")
26
+ assert agent._is_gated_model("openai/gpt-5.5")
27
+ assert not agent._is_gated_model("anthropic/claude-opus-4-6")
28
+ assert not agent._is_gated_model("moonshotai/Kimi-K2.6")
29
+
30
+
31
+ @pytest.mark.asyncio
32
+ async def test_gated_model_gate_rejects_gpt55_for_non_hf_user(monkeypatch):
33
+ async def fake_require_hf_org_member(_request):
34
+ return False
35
+
36
+ monkeypatch.setattr(agent, "require_huggingface_org_member", fake_require_hf_org_member)
37
+
38
+ with pytest.raises(HTTPException) as exc_info:
39
+ await agent._require_hf_for_gated_model(None, "openai/gpt-5.5")
40
+
41
+ assert exc_info.value.status_code == 403
42
+ assert exc_info.value.detail["error"] == "premium_model_restricted"
43
+
44
+
45
+ @pytest.mark.asyncio
46
+ async def test_ungated_models_skip_hf_membership_check(monkeypatch):
47
+ async def fail_if_called(_request):
48
+ raise AssertionError("ungated models must not require HF org membership")
49
+
50
+ monkeypatch.setattr(agent, "require_huggingface_org_member", fail_if_called)
51
+
52
+ await agent._require_hf_for_gated_model(None, "moonshotai/Kimi-K2.6")
53
+ await agent._require_hf_for_gated_model(None, "anthropic/claude-opus-4-6")
54
+
55
+
56
+ @pytest.mark.asyncio
57
+ async def test_gated_quota_charges_gpt55(monkeypatch):
58
+ persisted = []
59
+
60
+ async def fake_persist_session_snapshot(agent_session):
61
+ persisted.append(agent_session)
62
+
63
+ monkeypatch.setattr(
64
+ agent.session_manager,
65
+ "persist_session_snapshot",
66
+ fake_persist_session_snapshot,
67
+ )
68
+
69
+ agent_session = SimpleNamespace(
70
+ claude_counted=False,
71
+ session=SimpleNamespace(
72
+ config=SimpleNamespace(model_name="openai/gpt-5.5"),
73
+ ),
74
+ )
75
+
76
+ await agent._enforce_gated_model_quota(
77
+ {"user_id": "u1", "plan": "free"},
78
+ agent_session,
79
+ )
80
+
81
+ assert agent_session.claude_counted is True
82
+ assert persisted == [agent_session]
83
+ assert await agent.user_quotas.get_claude_used_today("u1") == 1
84
+
85
+
86
+ @pytest.mark.asyncio
87
+ async def test_gated_quota_skips_direct_anthropic(monkeypatch):
88
+ async def fail_if_persisted(_agent_session):
89
+ raise AssertionError("direct Anthropic should not consume deployed gated quota")
90
+
91
+ monkeypatch.setattr(
92
+ agent.session_manager,
93
+ "persist_session_snapshot",
94
+ fail_if_persisted,
95
+ )
96
+
97
+ agent_session = SimpleNamespace(
98
+ claude_counted=False,
99
+ session=SimpleNamespace(
100
+ config=SimpleNamespace(model_name="anthropic/claude-opus-4-6"),
101
+ ),
102
+ )
103
+
104
+ await agent._enforce_gated_model_quota(
105
+ {"user_id": "u1", "plan": "free"},
106
+ agent_session,
107
+ )
108
+
109
+ assert agent_session.claude_counted is False
110
+ assert await agent.user_quotas.get_claude_used_today("u1") == 0
111
+
112
+
113
+ @pytest.mark.asyncio
114
+ async def test_user_quota_response_uses_premium_fields_only(monkeypatch):
115
+ async def fake_get_used_today(user_id):
116
+ assert user_id == "u1"
117
+ return 2
118
+
119
+ monkeypatch.setattr(agent.user_quotas, "get_claude_used_today", fake_get_used_today)
120
+ monkeypatch.setattr(agent.user_quotas, "daily_cap_for", lambda plan: 5)
121
+
122
+ response = await agent.get_user_quota({"user_id": "u1", "plan": "pro"})
123
+
124
+ assert response == {
125
+ "plan": "pro",
126
+ "premium_used_today": 2,
127
+ "premium_daily_cap": 5,
128
+ "premium_remaining": 3,
129
+ }