lewtun HF Staff OpenAI Codex commited on
Commit
d7637ba
·
unverified ·
1 Parent(s): 1e1fe1f

Fix Pro entitlement checks (#239)

Browse files

* Fix Pro entitlement checks

Normalize HF Pro and paid org plan detection, allow premium model selection with quota enforcement, and clarify HF Jobs credit billing copy.

Co-authored-by: OpenAI Codex <codex@openai.com>

* Limit premium quota to personal Pro

Ignore org membership for premium-model quota so users need personal HF Pro for the higher cap, while dev mode uses the Pro tier for local testing.

Co-authored-by: OpenAI Codex <codex@openai.com>

* Scope model switch errors to model picker

Show model switch failures through local ChatInput state instead of rendering the shared agent error field globally.

Co-authored-by: OpenAI Codex <codex@openai.com>

* Restrict premium quota detection to Pro accounts

Co-authored-by: OpenAI Codex <codex@openai.com>

* Use boolean Pro signal for premium quota

Co-authored-by: OpenAI Codex <codex@openai.com>

* Use documented isPro field for quota tier

Co-authored-by: OpenAI Codex <codex@openai.com>

* Remove dead global chat error state

Co-authored-by: OpenAI Codex <codex@openai.com>

---------

Co-authored-by: OpenAI Codex <codex@openai.com>

agent/tools/jobs_tool.py CHANGED
@@ -631,10 +631,11 @@ class HfJobsTool:
631
  "formatted": (
632
  f"Hugging Face Jobs rejected this run because the "
633
  f"namespace `{self.namespace}` has no available credits. "
634
- "Tell the user to add credits at "
635
- "https://huggingface.co/settings/billing once topped up, "
636
- "re-run this same job. (Switching namespaces is fine if "
637
- "another wallet has credits.)"
 
638
  ),
639
  "totalResults": 0,
640
  "resultsShared": 0,
 
631
  "formatted": (
632
  f"Hugging Face Jobs rejected this run because the "
633
  f"namespace `{self.namespace}` has no available credits. "
634
+ "HF Jobs are billed with namespace credits, which are "
635
+ "separate from HF Pro membership. Tell the user to add "
636
+ "credits at https://huggingface.co/settings/billing "
637
+ "once topped up, re-run this same job. (Switching "
638
+ "namespaces is fine if another wallet has credits.)"
639
  ),
640
  "totalResults": 0,
641
  "resultsShared": 0,
backend/dependencies.py CHANGED
@@ -35,7 +35,7 @@ DEV_USER: dict[str, Any] = {
35
  "user_id": "dev",
36
  "username": "dev",
37
  "authenticated": True,
38
- "plan": "org", # Dev runs at the Pro/Org quota tier so local testing isn't capped.
39
  }
40
 
41
  INTERNAL_HF_TOKEN_KEY = "_hf_token"
@@ -53,8 +53,8 @@ REQUIRED_OAUTH_SCOPES: tuple[str, ...] = (
53
  "write-discussions",
54
  )
55
 
56
- # Plan field discovery — log the whoami-v2 shape once at DEBUG so we can
57
- # confirm the actual key in production without hammering the HF API.
58
  _WHOAMI_SHAPE_LOGGED = False
59
 
60
 
@@ -136,10 +136,21 @@ def _user_from_info(user_info: dict[str, Any]) -> dict[str, Any]:
136
  }
137
 
138
 
 
 
 
 
 
 
 
 
 
 
 
139
  async def _fetch_user_plan(token: str) -> str:
140
  """Look up the user's HF plan via /api/whoami-v2.
141
 
142
- Returns 'free' | 'pro' | 'org'. Non-200, network errors, or an unknown
143
  payload shape all collapse to 'free' — safe default; we'd rather under-
144
  grant the Pro cap than over-grant it on bad data.
145
  """
@@ -151,35 +162,14 @@ async def _fetch_user_plan(token: str) -> str:
151
  if not _WHOAMI_SHAPE_LOGGED:
152
  _WHOAMI_SHAPE_LOGGED = True
153
  logger.debug(
154
- "whoami-v2 payload keys: %s (sample values: plan=%r type=%r isPro=%r)",
155
  sorted(whoami.keys())
156
  if isinstance(whoami, dict)
157
  else type(whoami).__name__,
158
- whoami.get("plan") if isinstance(whoami, dict) else None,
159
- whoami.get("type") if isinstance(whoami, dict) else None,
160
  whoami.get("isPro") if isinstance(whoami, dict) else None,
161
  )
162
 
163
- if not isinstance(whoami, dict):
164
- return "free"
165
-
166
- # OAuth whoami sets `type: "user"` and surfaces Pro via the `isPro` boolean
167
- # — see Space discussion #21. HF-Jobs eligibility (PR #172) ignores plan
168
- # entirely; the premium-model daily-cap tier is still a free vs pro/org split.
169
- if whoami.get("isPro") is True or whoami.get("is_pro") is True:
170
- return "pro"
171
- plan_str = ""
172
- for key in ("plan", "type", "accountType"):
173
- value = whoami.get(key)
174
- if isinstance(value, str) and value:
175
- plan_str = value.lower()
176
- break
177
- if any(tag in plan_str for tag in ("pro", "enterprise", "team")):
178
- return "pro"
179
- orgs = whoami.get("orgs") or []
180
- if isinstance(orgs, list) and orgs:
181
- return "org"
182
- return "free"
183
 
184
 
185
  async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
 
35
  "user_id": "dev",
36
  "username": "dev",
37
  "authenticated": True,
38
+ "plan": "pro", # Dev runs at the Pro quota tier so local testing isn't capped.
39
  }
40
 
41
  INTERNAL_HF_TOKEN_KEY = "_hf_token"
 
53
  "write-discussions",
54
  )
55
 
56
+ # Log the whoami-v2 shape once at DEBUG so we can confirm the production Pro
57
+ # signal without hammering the HF API.
58
  _WHOAMI_SHAPE_LOGGED = False
59
 
60
 
 
136
  }
137
 
138
 
139
+ def _normalize_user_plan(whoami: Any) -> str:
140
+ """Normalize a whoami-v2 payload to the app's personal quota tiers."""
141
+ if not isinstance(whoami, dict):
142
+ return "free"
143
+
144
+ if whoami.get("isPro") is True:
145
+ return "pro"
146
+
147
+ return "free"
148
+
149
+
150
  async def _fetch_user_plan(token: str) -> str:
151
  """Look up the user's HF plan via /api/whoami-v2.
152
 
153
+ Returns 'free' | 'pro'. Non-200, network errors, or an unknown
154
  payload shape all collapse to 'free' — safe default; we'd rather under-
155
  grant the Pro cap than over-grant it on bad data.
156
  """
 
162
  if not _WHOAMI_SHAPE_LOGGED:
163
  _WHOAMI_SHAPE_LOGGED = True
164
  logger.debug(
165
+ "whoami-v2 payload keys: %s (sample values: isPro=%r)",
166
  sorted(whoami.keys())
167
  if isinstance(whoami, dict)
168
  else type(whoami).__name__,
 
 
169
  whoami.get("isPro") if isinstance(whoami, dict) else None,
170
  )
171
 
172
+ return _normalize_user_plan(whoami)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
 
175
  async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
backend/routes/agent.py CHANGED
@@ -12,7 +12,6 @@ from typing import Any
12
  from dependencies import (
13
  INTERNAL_HF_TOKEN_KEY,
14
  get_current_user,
15
- require_huggingface_org_member,
16
  )
17
  from fastapi import (
18
  APIRouter,
@@ -55,7 +54,7 @@ _background_teardown_tasks: set[asyncio.Task] = set()
55
 
56
  DEFAULT_CLAUDE_MODEL_ID = "bedrock/us.anthropic.claude-opus-4-6-v1"
57
  DEFAULT_FREE_MODEL_ID = "moonshotai/Kimi-K2.6"
58
- GATED_MODEL_IDS = {
59
  DEFAULT_CLAUDE_MODEL_ID,
60
  "openai/gpt-5.5",
61
  }
@@ -120,35 +119,8 @@ def _available_models() -> list[dict[str, Any]]:
120
  AVAILABLE_MODELS = _available_models()
121
 
122
 
123
- def _is_gated_model(model_id: str) -> bool:
124
- return model_id in GATED_MODEL_IDS
125
-
126
-
127
- def _premium_model_restricted_error() -> HTTPException:
128
- return HTTPException(
129
- status_code=403,
130
- detail={
131
- "error": "premium_model_restricted",
132
- "message": (
133
- "Premium models are gated to HF staff. Pick a free model — "
134
- "Kimi K2.6, MiniMax M2.7, GLM 5.1, or DeepSeek V4 Pro — "
135
- "instead."
136
- ),
137
- },
138
- )
139
-
140
-
141
- async def _require_hf_for_gated_model(request: Request, model_id: str) -> None:
142
- """403 if a non-``huggingface``-org user tries to select a gated model.
143
-
144
- Gated models are deployed paid endpoints backed by service-owned
145
- credentials. The gate only fires for deployed paid models so non-HF users
146
- can still freely switch between the free models.
147
- """
148
- if not _is_gated_model(model_id):
149
- return
150
- if not await require_huggingface_org_member(request):
151
- raise _premium_model_restricted_error()
152
 
153
 
154
  async def _model_override_for_new_session(
@@ -157,21 +129,19 @@ async def _model_override_for_new_session(
157
  ) -> str | None:
158
  """Return the model override to use when creating a new session.
159
 
160
- Explicit gated-model requests keep the hard membership gate. Implicit
161
- default sessions are more forgiving: when the configured default is gated
162
- and the user lacks access, start them on the first free model instead of
163
- blocking session creation.
164
  """
165
  resolved_model = requested_model or session_manager.config.model_name
166
- if not _is_gated_model(resolved_model):
167
- return requested_model
168
- if await require_huggingface_org_member(request):
169
  return requested_model
170
  if requested_model:
171
- raise _premium_model_restricted_error()
172
 
173
  logger.info(
174
- "Default gated model %s is unavailable to this user; "
175
  "creating session with free fallback %s",
176
  resolved_model,
177
  DEFAULT_FREE_MODEL_ID,
@@ -179,40 +149,48 @@ async def _model_override_for_new_session(
179
  return DEFAULT_FREE_MODEL_ID
180
 
181
 
182
- async def _enforce_gated_model_quota(
183
  user: dict[str, Any],
184
  agent_session: AgentSession,
185
  ) -> None:
186
- """Charge the user's daily gated-model quota on first use in a session.
187
 
188
  Runs at *message-submit* time, not session-create time — so spinning up a
189
- gated-model session to look around doesn't burn quota. The
190
  ``claude_counted`` flag on ``AgentSession`` guards against re-counting the
191
  same session; the stored field name is kept for persistence compatibility.
192
 
193
- No-ops when the session's current model isn't gated, or when this
194
  session has already been charged. Raises 429 when the user has hit
195
  their daily cap.
196
  """
197
  if agent_session.claude_counted:
198
  return
199
  model_name = agent_session.session.config.model_name
200
- if not _is_gated_model(model_name):
201
  return
202
  user_id = user["user_id"]
203
- cap = user_quotas.daily_cap_for(user.get("plan"))
 
204
  new_count = await user_quotas.try_increment_claude(user_id, cap)
205
  if new_count is None:
 
 
 
 
 
 
 
 
 
 
206
  raise HTTPException(
207
  status_code=429,
208
  detail={
209
  "error": "premium_model_daily_cap",
210
- "plan": user.get("plan", "free"),
211
  "cap": cap,
212
- "message": (
213
- "Daily premium model limit reached. Upgrade to HF Pro for "
214
- f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
215
- ),
216
  },
217
  )
218
  agent_session.claude_counted = True
@@ -405,7 +383,7 @@ async def create_session(
405
  behalf of the user.
406
 
407
  Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
408
- ids are rejected (400). The gated-model quota runs at message-submit
409
  time, not here — spinning up a session to look around is free.
410
 
411
  Returns 503 if the server or user has reached the session limit.
@@ -426,8 +404,8 @@ async def create_session(
426
  if model and model not in valid_ids:
427
  raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
428
 
429
- # Explicit premium selections remain gated. If the implicit configured
430
- # default is unavailable, start the session on a free model instead.
431
  model = await _model_override_for_new_session(request, model)
432
 
433
  try:
@@ -458,7 +436,7 @@ async def restore_session_summary(
458
  session's context as a user-role system note.
459
 
460
  Optional ``"model"`` in the body overrides the session's LLM. The
461
- gated-model quota runs at message-submit time, not here.
462
  """
463
  messages = body.get("messages")
464
  if not isinstance(messages, list) or not messages:
@@ -524,10 +502,7 @@ async def set_session_model(
524
 
525
  Takes effect on the next LLM call in that session — other sessions
526
  (including other browser tabs) are unaffected. Model switches don't
527
- charge quota — the gated-model quota only fires at message-submit time.
528
-
529
- Switching TO a gated deployed model requires HF org membership; free-model
530
- and local-dev direct provider switches are unrestricted.
531
  """
532
  agent_session = await _check_session_access(session_id, user, request)
533
  model_id = body.get("model")
@@ -536,7 +511,6 @@ async def set_session_model(
536
  valid_ids = {m["id"] for m in AVAILABLE_MODELS}
537
  if model_id not in valid_ids:
538
  raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
539
- await _require_hf_for_gated_model(request, model_id)
540
  if not agent_session:
541
  raise HTTPException(status_code=404, detail="Session not found")
542
  await session_manager.update_session_model(session_id, model_id)
@@ -686,7 +660,7 @@ async def submit_input(
686
  body = SubmitRequest(**payload)
687
  except ValidationError as exc:
688
  raise RequestValidationError(exc.errors()) from exc
689
- await _enforce_gated_model_quota(user, agent_session)
690
  success = await session_manager.submit_user_input(body.session_id, body.text)
691
  if not success:
692
  raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -738,12 +712,12 @@ async def chat_sse(
738
  text = body.get("text")
739
  approvals = body.get("approvals")
740
 
741
- # Gate user-message sends against the daily gated-model quota. Approvals are
742
  # continuations of an in-progress turn — the session was already charged
743
  # on its first message, so we skip the gate there.
744
  if text is not None and not approvals:
745
  try:
746
- await _enforce_gated_model_quota(user, agent_session)
747
  except HTTPException:
748
  broadcaster.unsubscribe(sub_id)
749
  raise
 
12
  from dependencies import (
13
  INTERNAL_HF_TOKEN_KEY,
14
  get_current_user,
 
15
  )
16
  from fastapi import (
17
  APIRouter,
 
54
 
55
  DEFAULT_CLAUDE_MODEL_ID = "bedrock/us.anthropic.claude-opus-4-6-v1"
56
  DEFAULT_FREE_MODEL_ID = "moonshotai/Kimi-K2.6"
57
+ PREMIUM_MODEL_IDS = {
58
  DEFAULT_CLAUDE_MODEL_ID,
59
  "openai/gpt-5.5",
60
  }
 
119
  AVAILABLE_MODELS = _available_models()
120
 
121
 
122
+ def _is_premium_model(model_id: str) -> bool:
123
+ return model_id in PREMIUM_MODEL_IDS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
 
126
  async def _model_override_for_new_session(
 
129
  ) -> str | None:
130
  """Return the model override to use when creating a new session.
131
 
132
+ Explicit premium model requests are allowed and charged at message-submit
133
+ time. Implicit default sessions are more forgiving: when the configured
134
+ default is premium, start them on the first free model instead of spending
135
+ premium quota accidentally.
136
  """
137
  resolved_model = requested_model or session_manager.config.model_name
138
+ if not _is_premium_model(resolved_model):
 
 
139
  return requested_model
140
  if requested_model:
141
+ return requested_model
142
 
143
  logger.info(
144
+ "Default premium model %s would spend quota; "
145
  "creating session with free fallback %s",
146
  resolved_model,
147
  DEFAULT_FREE_MODEL_ID,
 
149
  return DEFAULT_FREE_MODEL_ID
150
 
151
 
152
+ async def _enforce_premium_model_quota(
153
  user: dict[str, Any],
154
  agent_session: AgentSession,
155
  ) -> None:
156
+ """Charge the user's daily premium-model quota on first use in a session.
157
 
158
  Runs at *message-submit* time, not session-create time — so spinning up a
159
+ premium-model session to look around doesn't burn quota. The
160
  ``claude_counted`` flag on ``AgentSession`` guards against re-counting the
161
  same session; the stored field name is kept for persistence compatibility.
162
 
163
+ No-ops when the session's current model isn't premium, or when this
164
  session has already been charged. Raises 429 when the user has hit
165
  their daily cap.
166
  """
167
  if agent_session.claude_counted:
168
  return
169
  model_name = agent_session.session.config.model_name
170
+ if not _is_premium_model(model_name):
171
  return
172
  user_id = user["user_id"]
173
+ plan = user.get("plan", "free")
174
+ cap = user_quotas.daily_cap_for(plan)
175
  new_count = await user_quotas.try_increment_claude(user_id, cap)
176
  if new_count is None:
177
+ if plan == "pro":
178
+ message = (
179
+ "Daily premium model limit reached. Use a free model and try "
180
+ "premium models again tomorrow."
181
+ )
182
+ else:
183
+ message = (
184
+ "Daily premium model limit reached. Upgrade to HF Pro for "
185
+ f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
186
+ )
187
  raise HTTPException(
188
  status_code=429,
189
  detail={
190
  "error": "premium_model_daily_cap",
191
+ "plan": plan,
192
  "cap": cap,
193
+ "message": message,
 
 
 
194
  },
195
  )
196
  agent_session.claude_counted = True
 
383
  behalf of the user.
384
 
385
  Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
386
+ ids are rejected (400). The premium-model quota runs at message-submit
387
  time, not here — spinning up a session to look around is free.
388
 
389
  Returns 503 if the server or user has reached the session limit.
 
404
  if model and model not in valid_ids:
405
  raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
406
 
407
+ # Explicit premium selections are allowed. If the implicit configured
408
+ # default is premium, start the session on a free model instead.
409
  model = await _model_override_for_new_session(request, model)
410
 
411
  try:
 
436
  session's context as a user-role system note.
437
 
438
  Optional ``"model"`` in the body overrides the session's LLM. The
439
+ premium-model quota runs at message-submit time, not here.
440
  """
441
  messages = body.get("messages")
442
  if not isinstance(messages, list) or not messages:
 
502
 
503
  Takes effect on the next LLM call in that session — other sessions
504
  (including other browser tabs) are unaffected. Model switches don't
505
+ charge quota — the premium-model quota only fires at message-submit time.
 
 
 
506
  """
507
  agent_session = await _check_session_access(session_id, user, request)
508
  model_id = body.get("model")
 
511
  valid_ids = {m["id"] for m in AVAILABLE_MODELS}
512
  if model_id not in valid_ids:
513
  raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
 
514
  if not agent_session:
515
  raise HTTPException(status_code=404, detail="Session not found")
516
  await session_manager.update_session_model(session_id, model_id)
 
660
  body = SubmitRequest(**payload)
661
  except ValidationError as exc:
662
  raise RequestValidationError(exc.errors()) from exc
663
+ await _enforce_premium_model_quota(user, agent_session)
664
  success = await session_manager.submit_user_input(body.session_id, body.text)
665
  if not success:
666
  raise HTTPException(status_code=404, detail="Session not found or inactive")
 
712
  text = body.get("text")
713
  approvals = body.get("approvals")
714
 
715
+ # Gate user-message sends against the daily premium-model quota. Approvals are
716
  # continuations of an in-progress turn — the session was already charged
717
  # on its first message, so we skip the gate there.
718
  if text is not None and not approvals:
719
  try:
720
+ await _enforce_premium_model_quota(user, agent_session)
721
  except HTTPException:
722
  broadcaster.unsubscribe(sub_id)
723
  raise
backend/user_quotas.py CHANGED
@@ -13,7 +13,7 @@ back to a premium model doesn't (`AgentSession.claude_counted` guards that).
13
 
14
  Cap tiers:
15
  free user → CLAUDE_FREE_DAILY (1)
16
- pro / org → CLAUDE_PRO_DAILY (20)
17
  """
18
 
19
  import asyncio
@@ -40,7 +40,7 @@ def _today() -> str:
40
 
41
  def daily_cap_for(plan: str | None) -> int:
42
  """Return the daily Claude-session cap for the given plan."""
43
- return CLAUDE_FREE_DAILY if (plan or "free") == "free" else CLAUDE_PRO_DAILY
44
 
45
 
46
  async def get_claude_used_today(user_id: str) -> int:
 
13
 
14
  Cap tiers:
15
  free user → CLAUDE_FREE_DAILY (1)
16
+ pro user → CLAUDE_PRO_DAILY (20)
17
  """
18
 
19
  import asyncio
 
40
 
41
  def daily_cap_for(plan: str | None) -> int:
42
  """Return the daily Claude-session cap for the given plan."""
43
+ return CLAUDE_PRO_DAILY if plan == "pro" else CLAUDE_FREE_DAILY
44
 
45
 
46
  async def get_claude_used_today(user_id: str) -> int:
frontend/src/components/Chat/ChatInput.tsx CHANGED
@@ -1,5 +1,18 @@
1
  import { useState, useCallback, useEffect, useRef, KeyboardEvent } from 'react';
2
- import { Box, TextField, IconButton, CircularProgress, Typography, Menu, MenuItem, ListItemIcon, ListItemText, Chip } from '@mui/material';
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import ArrowUpwardIcon from '@mui/icons-material/ArrowUpward';
4
  import ArrowDropDownIcon from '@mui/icons-material/ArrowDropDown';
5
  import StopIcon from '@mui/icons-material/Stop';
@@ -87,6 +100,19 @@ const findModelByPath = (path: string, options: ModelOption[]): ModelOption | un
87
  return options.find(m => m.modelPath === path || path?.includes(m.id));
88
  };
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  interface ChatInputProps {
91
  sessionId?: string;
92
  initialModelPath?: string | null;
@@ -123,6 +149,7 @@ export default function ChatInput({ sessionId, initialModelPath, onSend, onStop,
123
  const setJobsUpgradeRequired = useAgentStore((s) => s.setJobsUpgradeRequired);
124
  const updateSessionModel = useSessionStore((s) => s.updateSessionModel);
125
  const [awaitingTopUp, setAwaitingTopUp] = useState(false);
 
126
  const lastSentRef = useRef<string>('');
127
 
128
  useEffect(() => {
@@ -240,8 +267,13 @@ export default function ChatInput({ sessionId, initialModelPath, onSend, onStop,
240
  if (res.ok) {
241
  setSelectedModelId(model.id);
242
  updateSessionModel(sessionId, model.modelPath);
 
 
243
  }
244
- } catch { /* ignore */ }
 
 
 
245
  };
246
 
247
  // Dialog close: just clear the flag. The typed text is already restored.
@@ -575,6 +607,21 @@ export default function ChatInput({ sessionId, initialModelPath, onSend, onStop,
575
  onUpgrade={handleJobsUpgradeClick}
576
  onRetry={handleJobsRetry}
577
  />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  </Box>
579
  </Box>
580
  );
 
1
  import { useState, useCallback, useEffect, useRef, KeyboardEvent } from 'react';
2
+ import {
3
+ Alert,
4
+ Box,
5
+ TextField,
6
+ IconButton,
7
+ CircularProgress,
8
+ Typography,
9
+ Menu,
10
+ MenuItem,
11
+ ListItemIcon,
12
+ ListItemText,
13
+ Chip,
14
+ Snackbar,
15
+ } from '@mui/material';
16
  import ArrowUpwardIcon from '@mui/icons-material/ArrowUpward';
17
  import ArrowDropDownIcon from '@mui/icons-material/ArrowDropDown';
18
  import StopIcon from '@mui/icons-material/Stop';
 
100
  return options.find(m => m.modelPath === path || path?.includes(m.id));
101
  };
102
 
103
+ const readApiErrorMessage = async (res: Response, fallback: string): Promise<string> => {
104
+ try {
105
+ const data = await res.json();
106
+ const detail = data?.detail;
107
+ if (typeof detail === 'string') return detail;
108
+ if (detail && typeof detail.message === 'string') return detail.message;
109
+ if (detail && typeof detail.error === 'string') return detail.error;
110
+ } catch {
111
+ /* ignore malformed error bodies */
112
+ }
113
+ return fallback;
114
+ };
115
+
116
  interface ChatInputProps {
117
  sessionId?: string;
118
  initialModelPath?: string | null;
 
149
  const setJobsUpgradeRequired = useAgentStore((s) => s.setJobsUpgradeRequired);
150
  const updateSessionModel = useSessionStore((s) => s.updateSessionModel);
151
  const [awaitingTopUp, setAwaitingTopUp] = useState(false);
152
+ const [modelSwitchError, setModelSwitchError] = useState<string | null>(null);
153
  const lastSentRef = useRef<string>('');
154
 
155
  useEffect(() => {
 
267
  if (res.ok) {
268
  setSelectedModelId(model.id);
269
  updateSessionModel(sessionId, model.modelPath);
270
+ setModelSwitchError(null);
271
+ return;
272
  }
273
+ setModelSwitchError(await readApiErrorMessage(res, 'Could not switch model.'));
274
+ } catch (error) {
275
+ setModelSwitchError(error instanceof Error ? error.message : 'Could not switch model.');
276
+ }
277
  };
278
 
279
  // Dialog close: just clear the flag. The typed text is already restored.
 
607
  onUpgrade={handleJobsUpgradeClick}
608
  onRetry={handleJobsRetry}
609
  />
610
+ <Snackbar
611
+ open={!!modelSwitchError}
612
+ anchorOrigin={{ vertical: 'top', horizontal: 'center' }}
613
+ onClose={() => setModelSwitchError(null)}
614
+ autoHideDuration={6000}
615
+ >
616
+ <Alert
617
+ severity="error"
618
+ variant="filled"
619
+ onClose={() => setModelSwitchError(null)}
620
+ sx={{ fontSize: '0.8rem', maxWidth: 480 }}
621
+ >
622
+ {modelSwitchError}
623
+ </Alert>
624
+ </Snackbar>
625
  </Box>
626
  </Box>
627
  );
frontend/src/components/ClaudeCapDialog.tsx CHANGED
@@ -30,9 +30,7 @@ export default function ClaudeCapDialog({
30
  onUseFreeModel,
31
  onUpgrade,
32
  }: ClaudeCapDialogProps) {
33
- // plan not surfaced in copy right now — Pro users see the same dialog and
34
- // can upgrade their org if they're also capped.
35
- void plan;
36
 
37
  return (
38
  <Dialog
@@ -62,62 +60,68 @@ export default function ClaudeCapDialog({
62
  sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
63
  >
64
  Opus and GPT-5.5 are expensive to run, so we cap premium models at {cap}{' '}
65
- {cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, GLM,
66
- or DeepSeek a spin instead.
 
 
67
  </DialogContentText>
68
- <Box
69
- sx={{
70
- mt: 2,
71
- p: 1.5,
72
- borderRadius: '8px',
73
- bgcolor: 'var(--accent-yellow-weak)',
74
- border: '1px solid var(--border)',
75
- }}
76
- >
77
- <Typography
78
- variant="caption"
79
  sx={{
80
- display: 'block',
81
- fontWeight: 700,
82
- color: 'var(--text)',
83
- fontSize: '0.78rem',
84
- mb: 0.5,
85
- letterSpacing: '0.02em',
86
  }}
87
  >
88
- HF Pro ($9/mo) — more premium model sessions
89
- </Typography>
90
- <Typography
91
- variant="caption"
92
- sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
93
- >
94
- {PRO_CAP} premium model sessions/day here, 20× HF Inference credits,
95
- ZeroGPU access, and priority on Spaces hardware.
96
- </Typography>
97
- </Box>
 
 
 
 
 
 
 
 
 
 
 
 
98
  </DialogContent>
99
  <DialogActions sx={{ px: 3, pb: 2.5, pt: 2, gap: 1 }}>
100
- <Button
101
- component="a"
102
- href={HF_PRICING_URL}
103
- target="_blank"
104
- rel="noopener noreferrer"
105
- onClick={onUpgrade}
106
- variant="contained"
107
- size="small"
108
- sx={{
109
- fontSize: '0.82rem',
110
- px: 2.5,
111
- bgcolor: 'var(--accent-yellow)',
112
- color: '#000',
113
- textTransform: 'none',
114
- fontWeight: 700,
115
- boxShadow: 'none',
116
- '&:hover': { bgcolor: '#FFB340', boxShadow: 'none' },
117
- }}
118
- >
119
- Upgrade to Pro
120
- </Button>
 
 
121
  <Button
122
  onClick={onUseFreeModel}
123
  size="small"
 
30
  onUseFreeModel,
31
  onUpgrade,
32
  }: ClaudeCapDialogProps) {
33
+ const isFreePlan = plan === 'free';
 
 
34
 
35
  return (
36
  <Dialog
 
60
  sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
61
  >
62
  Opus and GPT-5.5 are expensive to run, so we cap premium models at {cap}{' '}
63
+ {cap === 1 ? 'session' : 'sessions'} a day. {isFreePlan
64
+ ? 'HF Pro raises the daily premium-model limit.'
65
+ : 'Your plan has used today’s premium-model allowance.'}{' '}
66
+ Give Kimi, MiniMax, GLM, or DeepSeek a spin instead.
67
  </DialogContentText>
68
+ {isFreePlan && (
69
+ <Box
 
 
 
 
 
 
 
 
 
70
  sx={{
71
+ mt: 2,
72
+ p: 1.5,
73
+ borderRadius: '8px',
74
+ bgcolor: 'var(--accent-yellow-weak)',
75
+ border: '1px solid var(--border)',
 
76
  }}
77
  >
78
+ <Typography
79
+ variant="caption"
80
+ sx={{
81
+ display: 'block',
82
+ fontWeight: 700,
83
+ color: 'var(--text)',
84
+ fontSize: '0.78rem',
85
+ mb: 0.5,
86
+ letterSpacing: '0.02em',
87
+ }}
88
+ >
89
+ HF Pro ($9/mo) — more premium model sessions
90
+ </Typography>
91
+ <Typography
92
+ variant="caption"
93
+ sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
94
+ >
95
+ {PRO_CAP} premium model sessions/day here, 20× HF Inference credits,
96
+ ZeroGPU access, and priority on Spaces hardware.
97
+ </Typography>
98
+ </Box>
99
+ )}
100
  </DialogContent>
101
  <DialogActions sx={{ px: 3, pb: 2.5, pt: 2, gap: 1 }}>
102
+ {isFreePlan && (
103
+ <Button
104
+ component="a"
105
+ href={HF_PRICING_URL}
106
+ target="_blank"
107
+ rel="noopener noreferrer"
108
+ onClick={onUpgrade}
109
+ variant="contained"
110
+ size="small"
111
+ sx={{
112
+ fontSize: '0.82rem',
113
+ px: 2.5,
114
+ bgcolor: 'var(--accent-yellow)',
115
+ color: '#000',
116
+ textTransform: 'none',
117
+ fontWeight: 700,
118
+ boxShadow: 'none',
119
+ '&:hover': { bgcolor: '#FFB340', boxShadow: 'none' },
120
+ }}
121
+ >
122
+ Upgrade to Pro
123
+ </Button>
124
+ )}
125
  <Button
126
  onClick={onUseFreeModel}
127
  size="small"
frontend/src/components/JobsUpgradeDialog.tsx CHANGED
@@ -148,7 +148,7 @@ export default function JobsUpgradeDialog({
148
  {awaitingTopUp
149
  ? 'Once your top-up is through, click below to resume — the agent will pick the run back up where it left off.'
150
  : message ||
151
- 'Hugging Face Jobs need credits on the namespace running them. Add some, then resume the agent waits here in the meantime.'}
152
  </Typography>
153
 
154
  <Box
 
148
  {awaitingTopUp
149
  ? 'Once your top-up is through, click below to resume — the agent will pick the run back up where it left off.'
150
  : message ||
151
+ 'Hugging Face Jobs need credits on the namespace running them. Job credits are separate from HF Pro membership. Add some, then resume.'}
152
  </Typography>
153
 
154
  <Box
frontend/src/hooks/useAgentChat.ts CHANGED
@@ -60,9 +60,6 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
60
  },
61
  onError: (error: string) => {
62
  updateSession(sessionId, { isProcessing: false });
63
- if (isActiveRef.current) {
64
- useAgentStore.getState().setError(error);
65
- }
66
  callbacksRef.current.onError?.(error);
67
  },
68
  onProcessing: () => {
@@ -369,9 +366,6 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
369
  return;
370
  }
371
  logger.error('useChat error:', error);
372
- if (isActiveRef.current) {
373
- useAgentStore.getState().setError(error.message);
374
- }
375
  },
376
  });
377
 
 
60
  },
61
  onError: (error: string) => {
62
  updateSession(sessionId, { isProcessing: false });
 
 
 
63
  callbacksRef.current.onError?.(error);
64
  },
65
  onProcessing: () => {
 
366
  return;
367
  }
368
  logger.error('useChat error:', error);
 
 
 
369
  },
370
  });
371
 
frontend/src/hooks/useUserQuota.ts CHANGED
@@ -9,7 +9,7 @@ import { useCallback, useEffect, useState } from 'react';
9
  import { useAgentStore } from '@/store/agentStore';
10
  import { apiFetch } from '@/utils/api';
11
 
12
- export type PlanTier = 'free' | 'pro' | 'org';
13
 
14
  export interface UserQuota {
15
  plan: PlanTier;
 
9
  import { useAgentStore } from '@/store/agentStore';
10
  import { apiFetch } from '@/utils/api';
11
 
12
+ export type PlanTier = 'free' | 'pro';
13
 
14
  export interface UserQuota {
15
  plan: PlanTier;
frontend/src/lib/sse-chat-transport.ts CHANGED
@@ -294,8 +294,8 @@ function createEventToChunkStream(sideChannel: SideChannelCallbacks): TransformS
294
  useAgentStore.getState().setJobsUpgradeRequired({
295
  namespace: namespace || null,
296
  message: namespace
297
- ? `Hugging Face Jobs need credits on the "${namespace}" namespace. Add some, then re-run the same job the agent will pick it back up.`
298
- : 'Hugging Face Jobs need credits on this namespace. Add some, then re-run the same job — the agent will pick it back up.',
299
  });
300
  }
301
  break;
 
294
  useAgentStore.getState().setJobsUpgradeRequired({
295
  namespace: namespace || null,
296
  message: namespace
297
+ ? `Hugging Face Jobs need credits on the "${namespace}" namespace. Job credits are separate from HF Pro membership; add credits, then re-run the same job.`
298
+ : 'Hugging Face Jobs need namespace credits, which are separate from HF Pro membership. Add credits, then re-run the same job.',
299
  });
300
  }
301
  break;
frontend/src/store/agentStore.ts CHANGED
@@ -6,7 +6,7 @@
6
  * - Connection / processing flags
7
  * - Panel state (right panel — single-artifact pattern)
8
  * - Plan state
9
- * - User info / error banners
10
  * - Edited scripts (for hf_jobs code editing)
11
  *
12
  * Per-session state:
@@ -117,7 +117,6 @@ interface AgentStore {
117
  isConnected: boolean;
118
  activityStatus: ActivityStatus;
119
  user: User | null;
120
- error: string | null;
121
  llmHealthError: LLMHealthError | null;
122
  /** Set when a premium-model send hits the daily quota; ChatInput opens the cap dialog. */
123
  claudeQuotaExhausted: boolean;
@@ -173,7 +172,6 @@ interface AgentStore {
173
  setConnected: (isConnected: boolean) => void;
174
  setActivityStatus: (status: ActivityStatus) => void;
175
  setUser: (user: User | null) => void;
176
- setError: (error: string | null) => void;
177
  setLlmHealthError: (error: LLMHealthError | null) => void;
178
  setClaudeQuotaExhausted: (exhausted: boolean) => void;
179
  setJobsUpgradeRequired: (state: JobsUpgradeState | null) => void;
@@ -295,7 +293,6 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
295
  isConnected: false,
296
  activityStatus: { type: 'idle' },
297
  user: null,
298
- error: null,
299
  llmHealthError: null,
300
  claudeQuotaExhausted: false,
301
  jobsUpgradeRequired: null,
@@ -335,7 +332,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
335
  // (plus activityStatus when the processing→idle side-effect fires).
336
  // This prevents overwriting flat fields changed by global setters
337
  // (e.g. setPanelView called from CodePanel) with stale snapshot values.
338
- let flatMirror: Record<string, unknown> = {};
339
  if (isActive) {
340
  for (const key of Object.keys(updates)) {
341
  flatMirror[key] = updated[key as keyof PerSessionState];
@@ -388,14 +385,13 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
388
  panelView: incoming.panelView,
389
  panelEditable: incoming.panelEditable,
390
  plan: incoming.plan,
391
- // Clear transient error on switch
392
- error: null,
393
  });
394
  },
395
 
396
  clearSessionState: (sessionId) => {
397
  set((state) => {
398
- const { [sessionId]: _, ...rest } = state.sessionStates;
 
399
  return { sessionStates: rest };
400
  });
401
  },
@@ -410,7 +406,6 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
410
  setConnected: (isConnected) => set({ isConnected }),
411
  setActivityStatus: (status) => set({ activityStatus: status }),
412
  setUser: (user) => set({ user }),
413
- setError: (error) => set({ error }),
414
  setLlmHealthError: (error) => set({ llmHealthError: error }),
415
  setClaudeQuotaExhausted: (exhausted) => set({ claudeQuotaExhausted: exhausted }),
416
  setJobsUpgradeRequired: (state) => set({ jobsUpgradeRequired: state }),
 
6
  * - Connection / processing flags
7
  * - Panel state (right panel — single-artifact pattern)
8
  * - Plan state
9
+ * - User info / health and quota banners
10
  * - Edited scripts (for hf_jobs code editing)
11
  *
12
  * Per-session state:
 
117
  isConnected: boolean;
118
  activityStatus: ActivityStatus;
119
  user: User | null;
 
120
  llmHealthError: LLMHealthError | null;
121
  /** Set when a premium-model send hits the daily quota; ChatInput opens the cap dialog. */
122
  claudeQuotaExhausted: boolean;
 
172
  setConnected: (isConnected: boolean) => void;
173
  setActivityStatus: (status: ActivityStatus) => void;
174
  setUser: (user: User | null) => void;
 
175
  setLlmHealthError: (error: LLMHealthError | null) => void;
176
  setClaudeQuotaExhausted: (exhausted: boolean) => void;
177
  setJobsUpgradeRequired: (state: JobsUpgradeState | null) => void;
 
293
  isConnected: false,
294
  activityStatus: { type: 'idle' },
295
  user: null,
 
296
  llmHealthError: null,
297
  claudeQuotaExhausted: false,
298
  jobsUpgradeRequired: null,
 
332
  // (plus activityStatus when the processing→idle side-effect fires).
333
  // This prevents overwriting flat fields changed by global setters
334
  // (e.g. setPanelView called from CodePanel) with stale snapshot values.
335
+ const flatMirror: Record<string, unknown> = {};
336
  if (isActive) {
337
  for (const key of Object.keys(updates)) {
338
  flatMirror[key] = updated[key as keyof PerSessionState];
 
385
  panelView: incoming.panelView,
386
  panelEditable: incoming.panelEditable,
387
  plan: incoming.plan,
 
 
388
  });
389
  },
390
 
391
  clearSessionState: (sessionId) => {
392
  set((state) => {
393
+ const rest = { ...state.sessionStates };
394
+ delete rest[sessionId];
395
  return { sessionStates: rest };
396
  });
397
  },
 
406
  setConnected: (isConnected) => set({ isConnected }),
407
  setActivityStatus: (status) => set({ activityStatus: status }),
408
  setUser: (user) => set({ user }),
 
409
  setLlmHealthError: (error) => set({ llmHealthError: error }),
410
  setClaudeQuotaExhausted: (exhausted) => set({ claudeQuotaExhausted: exhausted }),
411
  setJobsUpgradeRequired: (state) => set({ jobsUpgradeRequired: state }),
tests/unit/test_agent_model_gating.py CHANGED
@@ -1,4 +1,4 @@
1
- """Tests for gated model handling in backend/routes/agent.py."""
2
 
3
  import asyncio
4
  import sys
@@ -22,43 +22,15 @@ def _reset_quota_store():
22
  agent.user_quotas._reset_for_tests()
23
 
24
 
25
- def test_gated_model_predicate_includes_bedrock_claude_and_gpt55_only():
26
- assert agent._is_gated_model("bedrock/us.anthropic.claude-opus-4-6-v1")
27
- assert agent._is_gated_model("openai/gpt-5.5")
28
- assert not agent._is_gated_model("anthropic/claude-opus-4-6")
29
- assert not agent._is_gated_model("moonshotai/Kimi-K2.6")
30
 
31
 
32
  @pytest.mark.asyncio
33
- async def test_gated_model_gate_rejects_gpt55_for_non_hf_user(monkeypatch):
34
- async def fake_require_hf_org_member(_request):
35
- return False
36
-
37
- monkeypatch.setattr(
38
- agent,
39
- "require_huggingface_org_member",
40
- fake_require_hf_org_member,
41
- )
42
-
43
- with pytest.raises(HTTPException) as exc_info:
44
- await agent._require_hf_for_gated_model(None, "openai/gpt-5.5")
45
-
46
- assert exc_info.value.status_code == 403
47
- assert exc_info.value.detail["error"] == "premium_model_restricted"
48
-
49
-
50
- @pytest.mark.asyncio
51
- async def test_default_gated_session_falls_back_to_free_model_for_non_hf_user(
52
- monkeypatch,
53
- ):
54
- async def fake_require_hf_org_member(_request):
55
- return False
56
-
57
- monkeypatch.setattr(
58
- agent,
59
- "require_huggingface_org_member",
60
- fake_require_hf_org_member,
61
- )
62
  monkeypatch.setattr(
63
  agent.session_manager.config,
64
  "model_name",
@@ -71,19 +43,11 @@ async def test_default_gated_session_falls_back_to_free_model_for_non_hf_user(
71
 
72
 
73
  @pytest.mark.asyncio
74
- async def test_default_gated_session_stays_default_for_hf_user(monkeypatch):
75
- async def fake_require_hf_org_member(_request):
76
- return True
77
-
78
- monkeypatch.setattr(
79
- agent,
80
- "require_huggingface_org_member",
81
- fake_require_hf_org_member,
82
- )
83
  monkeypatch.setattr(
84
  agent.session_manager.config,
85
  "model_name",
86
- agent.DEFAULT_CLAUDE_MODEL_ID,
87
  )
88
 
89
  model = await agent._model_override_for_new_session(None, None)
@@ -92,16 +56,7 @@ async def test_default_gated_session_stays_default_for_hf_user(monkeypatch):
92
 
93
 
94
  @pytest.mark.asyncio
95
- async def test_explicit_gated_session_allowed_for_hf_user(monkeypatch):
96
- async def fake_require_hf_org_member(_request):
97
- return True
98
-
99
- monkeypatch.setattr(
100
- agent,
101
- "require_huggingface_org_member",
102
- fake_require_hf_org_member,
103
- )
104
-
105
  model = await agent._model_override_for_new_session(
106
  None,
107
  agent.DEFAULT_CLAUDE_MODEL_ID,
@@ -111,34 +66,39 @@ async def test_explicit_gated_session_allowed_for_hf_user(monkeypatch):
111
 
112
 
113
  @pytest.mark.asyncio
114
- async def test_explicit_gated_session_request_still_rejects_non_hf_user(monkeypatch):
115
- async def fake_require_hf_org_member(_request):
116
- return False
117
-
118
- monkeypatch.setattr(
119
- agent, "require_huggingface_org_member", fake_require_hf_org_member
120
- )
121
-
122
- with pytest.raises(HTTPException) as exc_info:
123
- await agent._model_override_for_new_session(None, agent.DEFAULT_CLAUDE_MODEL_ID)
124
 
125
- assert exc_info.value.status_code == 403
126
- assert exc_info.value.detail["error"] == "premium_model_restricted"
 
 
127
 
 
 
128
 
129
- @pytest.mark.asyncio
130
- async def test_ungated_models_skip_hf_membership_check(monkeypatch):
131
- async def fail_if_called(_request):
132
- raise AssertionError("ungated models must not require HF org membership")
 
 
133
 
134
- monkeypatch.setattr(agent, "require_huggingface_org_member", fail_if_called)
 
 
 
 
 
135
 
136
- await agent._require_hf_for_gated_model(None, "moonshotai/Kimi-K2.6")
137
- await agent._require_hf_for_gated_model(None, "anthropic/claude-opus-4-6")
138
 
139
 
140
  @pytest.mark.asyncio
141
- async def test_gated_quota_charges_gpt55(monkeypatch):
142
  persisted = []
143
 
144
  async def fake_persist_session_snapshot(agent_session):
@@ -157,7 +117,7 @@ async def test_gated_quota_charges_gpt55(monkeypatch):
157
  ),
158
  )
159
 
160
- await agent._enforce_gated_model_quota(
161
  {"user_id": "u1", "plan": "free"},
162
  agent_session,
163
  )
@@ -168,9 +128,113 @@ async def test_gated_quota_charges_gpt55(monkeypatch):
168
 
169
 
170
  @pytest.mark.asyncio
171
- async def test_gated_quota_skips_direct_anthropic(monkeypatch):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  async def fail_if_persisted(_agent_session):
173
- raise AssertionError("direct Anthropic should not consume deployed gated quota")
174
 
175
  monkeypatch.setattr(
176
  agent.session_manager,
@@ -185,7 +249,7 @@ async def test_gated_quota_skips_direct_anthropic(monkeypatch):
185
  ),
186
  )
187
 
188
- await agent._enforce_gated_model_quota(
189
  {"user_id": "u1", "plan": "free"},
190
  agent_session,
191
  )
 
1
+ """Tests for premium model handling in backend/routes/agent.py."""
2
 
3
  import asyncio
4
  import sys
 
22
  agent.user_quotas._reset_for_tests()
23
 
24
 
25
+ def test_premium_model_predicate_includes_bedrock_claude_and_gpt55_only():
26
+ assert agent._is_premium_model("bedrock/us.anthropic.claude-opus-4-6-v1")
27
+ assert agent._is_premium_model("openai/gpt-5.5")
28
+ assert not agent._is_premium_model("anthropic/claude-opus-4-6")
29
+ assert not agent._is_premium_model("moonshotai/Kimi-K2.6")
30
 
31
 
32
  @pytest.mark.asyncio
33
+ async def test_default_premium_session_falls_back_to_free_model(monkeypatch):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  monkeypatch.setattr(
35
  agent.session_manager.config,
36
  "model_name",
 
43
 
44
 
45
  @pytest.mark.asyncio
46
+ async def test_default_free_session_keeps_config_default(monkeypatch):
 
 
 
 
 
 
 
 
47
  monkeypatch.setattr(
48
  agent.session_manager.config,
49
  "model_name",
50
+ agent.DEFAULT_FREE_MODEL_ID,
51
  )
52
 
53
  model = await agent._model_override_for_new_session(None, None)
 
56
 
57
 
58
  @pytest.mark.asyncio
59
+ async def test_explicit_premium_session_allowed_for_authenticated_user():
 
 
 
 
 
 
 
 
 
60
  model = await agent._model_override_for_new_session(
61
  None,
62
  agent.DEFAULT_CLAUDE_MODEL_ID,
 
66
 
67
 
68
  @pytest.mark.asyncio
69
+ async def test_switching_to_premium_model_is_allowed_for_authenticated_user(
70
+ monkeypatch,
71
+ ):
72
+ updated = []
 
 
 
 
 
 
73
 
74
+ async def fake_check_session_access(session_id, user, request=None):
75
+ assert session_id == "s1"
76
+ assert user["user_id"] == "u1"
77
+ return SimpleNamespace(user_id="u1")
78
 
79
+ async def fake_update_session_model(session_id, model_id):
80
+ updated.append((session_id, model_id))
81
 
82
+ monkeypatch.setattr(agent, "_check_session_access", fake_check_session_access)
83
+ monkeypatch.setattr(
84
+ agent.session_manager,
85
+ "update_session_model",
86
+ fake_update_session_model,
87
+ )
88
 
89
+ response = await agent.set_session_model(
90
+ "s1",
91
+ {"model": "openai/gpt-5.5"},
92
+ request=None,
93
+ user={"user_id": "u1", "plan": "free"},
94
+ )
95
 
96
+ assert response == {"session_id": "s1", "model": "openai/gpt-5.5"}
97
+ assert updated == [("s1", "openai/gpt-5.5")]
98
 
99
 
100
  @pytest.mark.asyncio
101
+ async def test_premium_quota_charges_gpt55(monkeypatch):
102
  persisted = []
103
 
104
  async def fake_persist_session_snapshot(agent_session):
 
117
  ),
118
  )
119
 
120
+ await agent._enforce_premium_model_quota(
121
  {"user_id": "u1", "plan": "free"},
122
  agent_session,
123
  )
 
128
 
129
 
130
  @pytest.mark.asyncio
131
+ async def test_free_user_premium_quota_rejects_second_session(monkeypatch):
132
+ async def fake_persist_session_snapshot(_agent_session):
133
+ return None
134
+
135
+ monkeypatch.setattr(
136
+ agent.session_manager,
137
+ "persist_session_snapshot",
138
+ fake_persist_session_snapshot,
139
+ )
140
+
141
+ first_session = SimpleNamespace(
142
+ claude_counted=False,
143
+ session=SimpleNamespace(
144
+ config=SimpleNamespace(model_name="openai/gpt-5.5"),
145
+ ),
146
+ )
147
+ second_session = SimpleNamespace(
148
+ claude_counted=False,
149
+ session=SimpleNamespace(
150
+ config=SimpleNamespace(model_name="openai/gpt-5.5"),
151
+ ),
152
+ )
153
+
154
+ await agent._enforce_premium_model_quota(
155
+ {"user_id": "free-user", "plan": "free"},
156
+ first_session,
157
+ )
158
+ with pytest.raises(HTTPException) as exc_info:
159
+ await agent._enforce_premium_model_quota(
160
+ {"user_id": "free-user", "plan": "free"},
161
+ second_session,
162
+ )
163
+
164
+ assert exc_info.value.status_code == 429
165
+ assert exc_info.value.detail["error"] == "premium_model_daily_cap"
166
+ assert exc_info.value.detail["plan"] == "free"
167
+
168
+
169
+ @pytest.mark.asyncio
170
+ async def test_pro_user_uses_pro_premium_quota(monkeypatch):
171
+ async def fake_persist_session_snapshot(_agent_session):
172
+ return None
173
+
174
+ monkeypatch.setattr(
175
+ agent.session_manager,
176
+ "persist_session_snapshot",
177
+ fake_persist_session_snapshot,
178
+ )
179
+
180
+ for index in range(2):
181
+ agent_session = SimpleNamespace(
182
+ claude_counted=False,
183
+ session=SimpleNamespace(
184
+ config=SimpleNamespace(model_name="openai/gpt-5.5"),
185
+ ),
186
+ )
187
+ await agent._enforce_premium_model_quota(
188
+ {"user_id": "pro-user", "plan": "pro"},
189
+ agent_session,
190
+ )
191
+ assert agent_session.claude_counted is True
192
+ assert await agent.user_quotas.get_claude_used_today("pro-user") == index + 1
193
+
194
+
195
+ @pytest.mark.asyncio
196
+ async def test_org_plan_uses_free_premium_quota(monkeypatch):
197
+ async def fake_persist_session_snapshot(_agent_session):
198
+ return None
199
+
200
+ monkeypatch.setattr(
201
+ agent.session_manager,
202
+ "persist_session_snapshot",
203
+ fake_persist_session_snapshot,
204
+ )
205
+
206
+ first_session = SimpleNamespace(
207
+ claude_counted=False,
208
+ session=SimpleNamespace(
209
+ config=SimpleNamespace(model_name="openai/gpt-5.5"),
210
+ ),
211
+ )
212
+ second_session = SimpleNamespace(
213
+ claude_counted=False,
214
+ session=SimpleNamespace(
215
+ config=SimpleNamespace(model_name="openai/gpt-5.5"),
216
+ ),
217
+ )
218
+
219
+ await agent._enforce_premium_model_quota(
220
+ {"user_id": "org-user", "plan": "org"},
221
+ first_session,
222
+ )
223
+ with pytest.raises(HTTPException) as exc_info:
224
+ await agent._enforce_premium_model_quota(
225
+ {"user_id": "org-user", "plan": "org"},
226
+ second_session,
227
+ )
228
+
229
+ assert exc_info.value.status_code == 429
230
+ assert exc_info.value.detail["plan"] == "org"
231
+ assert "Upgrade to HF Pro" in exc_info.value.detail["message"]
232
+
233
+
234
+ @pytest.mark.asyncio
235
+ async def test_premium_quota_skips_direct_anthropic(monkeypatch):
236
  async def fail_if_persisted(_agent_session):
237
+ raise AssertionError("direct Anthropic should not consume premium quota")
238
 
239
  monkeypatch.setattr(
240
  agent.session_manager,
 
249
  ),
250
  )
251
 
252
+ await agent._enforce_premium_model_quota(
253
  {"user_id": "u1", "plan": "free"},
254
  agent_session,
255
  )
tests/unit/test_plan_normalization.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for Hugging Face plan normalization."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import pytest
7
+
8
+ _BACKEND_DIR = Path(__file__).resolve().parent.parent.parent / "backend"
9
+ if str(_BACKEND_DIR) not in sys.path:
10
+ sys.path.insert(0, str(_BACKEND_DIR))
11
+
12
+ import dependencies # noqa: E402
13
+
14
+
15
+ def test_oauth_is_pro_flag_takes_priority_over_user_type():
16
+ assert dependencies._normalize_user_plan({"type": "user", "isPro": True}) == "pro"
17
+
18
+
19
+ @pytest.mark.parametrize(
20
+ "payload",
21
+ [
22
+ {"is_pro": True},
23
+ {"accountType": "pro"},
24
+ {"plan": "HF Pro"},
25
+ {"subscription": "hf_pro"},
26
+ {"accountType": "team"},
27
+ {"plan": "enterprise"},
28
+ {"tier": "promotional"},
29
+ ],
30
+ )
31
+ def test_non_ispro_signals_stay_free(payload):
32
+ assert dependencies._normalize_user_plan(payload) == "free"
33
+
34
+
35
+ def test_free_user_with_free_org_stays_free():
36
+ whoami = {
37
+ "name": "alice",
38
+ "type": "user",
39
+ "orgs": [{"name": "oss-friends", "plan": "free"}],
40
+ }
41
+
42
+ assert dependencies._normalize_user_plan(whoami) == "free"
43
+
44
+
45
+ def test_user_with_paid_org_without_personal_pro_stays_free():
46
+ whoami = {
47
+ "name": "alice",
48
+ "type": "user",
49
+ "orgs": [{"name": "team-a", "plan": "team"}],
50
+ }
51
+
52
+ assert dependencies._normalize_user_plan(whoami) == "free"
53
+
54
+
55
+ @pytest.mark.parametrize("payload", [None, [], {"type": "user"}, {"plan": "free"}])
56
+ def test_unknown_or_malformed_payload_defaults_to_free(payload):
57
+ assert dependencies._normalize_user_plan(payload) == "free"
tests/unit/test_user_quotas.py CHANGED
@@ -27,16 +27,13 @@ def _reset_store():
27
  def test_daily_cap_for_known_plans():
28
  assert user_quotas.daily_cap_for("free") == user_quotas.CLAUDE_FREE_DAILY
29
  assert user_quotas.daily_cap_for("pro") == user_quotas.CLAUDE_PRO_DAILY
30
- assert user_quotas.daily_cap_for("org") == user_quotas.CLAUDE_PRO_DAILY
31
 
32
 
33
  def test_daily_cap_for_unknown_or_missing_defaults_to_free():
34
  assert user_quotas.daily_cap_for(None) == user_quotas.CLAUDE_FREE_DAILY
35
  assert user_quotas.daily_cap_for("") == user_quotas.CLAUDE_FREE_DAILY
36
- # Anything we don't recognize as the Pro/Org tier gets the Pro cap because
37
- # the function's contract is "free" is the only downgraded tier. If that
38
- # ever flips, this test will flip too — adjust consciously.
39
- assert user_quotas.daily_cap_for("mystery") == user_quotas.CLAUDE_PRO_DAILY
40
 
41
 
42
  @pytest.mark.asyncio
 
27
  def test_daily_cap_for_known_plans():
28
  assert user_quotas.daily_cap_for("free") == user_quotas.CLAUDE_FREE_DAILY
29
  assert user_quotas.daily_cap_for("pro") == user_quotas.CLAUDE_PRO_DAILY
30
+ assert user_quotas.daily_cap_for("org") == user_quotas.CLAUDE_FREE_DAILY
31
 
32
 
33
  def test_daily_cap_for_unknown_or_missing_defaults_to_free():
34
  assert user_quotas.daily_cap_for(None) == user_quotas.CLAUDE_FREE_DAILY
35
  assert user_quotas.daily_cap_for("") == user_quotas.CLAUDE_FREE_DAILY
36
+ assert user_quotas.daily_cap_for("mystery") == user_quotas.CLAUDE_FREE_DAILY
 
 
 
37
 
38
 
39
  @pytest.mark.asyncio