ml-intern

Sleeping

App Files Files Community

lewtun HF Staff commited on Apr 30

Commit

5db99fa

unverified ·

1 Parent(s): 7867a7a

Expose OpenAI models in the UI & make Claude model picker configurable (#183)

Browse files

* Add agent dev server notes

* Make frontend model configurable

* Support env-selected frontend models

* Use Claude-specific model env var

* Add GPT-5.5 to web model picker

* Gate GPT-5.5 as a premium model

* Avoid duplicate session model fetch

* Remove legacy Claude quota aliases

* Document GitHub CLI PR body workflow

* Gate only deployed paid model IDs

* Nits

Files changed (13) hide show

AGENTS.md +20 -0
backend/dependencies.py +1 -1
backend/routes/agent.py +100 -72
backend/user_quotas.py +9 -6
configs/frontend_agent_config.json +1 -1
frontend/src/components/Chat/ChatInput.tsx +79 -27
frontend/src/components/ClaudeCapDialog.tsx +7 -7
frontend/src/hooks/useAgentChat.ts +1 -1
frontend/src/hooks/useUserQuota.ts +7 -7
frontend/src/lib/sse-chat-transport.ts +1 -1
frontend/src/store/agentStore.ts +1 -1
frontend/src/utils/model.ts +6 -1
tests/unit/test_agent_model_gating.py +129 -0

AGENTS.md ADDED Viewed

	@@ -0,0 +1,20 @@

+# Agent Notes
+## Local Dev Servers
+- Frontend: from `frontend/`, run `npm ci` if dependencies are missing, then `npm run dev`.
+- Backend: from `backend/`, run `uv run uvicorn main:app --host ::1 --port 7860`.
+- Frontend URL: http://localhost:5173/
+- Backend health check: `curl -g http://[::1]:7860/api`
+- Frontend proxy health check: `curl http://localhost:5173/api`
+Notes:
+- Vite proxies `/api` and `/auth` to `http://localhost:7860`.
+- If `127.0.0.1:7860` is already owned by another local process, binding the backend to `::1` lets the Vite proxy resolve `localhost` cleanly.
+- Prefer `npm ci` over `npm install` for setup, since `npm install` may rewrite `frontend/package-lock.json` metadata depending on npm version.
+- Production defaults to the Bedrock Claude model. For local development with a personal Anthropic key, set `ANTHROPIC_API_KEY` and `ML_INTERN_CLAUDE_MODEL_ID=anthropic/claude-opus-4-6` before starting the backend. Other models are selected through the app's model switcher.
+## GitHub CLI
+- For multiline PR descriptions, prefer `gh pr edit <number> --body-file <file>` over inline `--body` so shell quoting, `$` env-var names, backticks, and newlines are preserved correctly.

backend/dependencies.py CHANGED Viewed

@@ -111,7 +111,7 @@ async def _fetch_user_plan(token: str) -> str:
     # OAuth whoami sets `type: "user"` and surfaces Pro via the `isPro` boolean
     # — see Space discussion #21. HF-Jobs eligibility (PR #172) ignores plan
-    # entirely; the Claude daily-cap tier is still a free vs pro/org split.
     if whoami.get("isPro") is True or whoami.get("is_pro") is True:
         return "pro"
     plan_str = ""

     # OAuth whoami sets `type: "user"` and surfaces Pro via the `isPro` boolean
     # — see Space discussion #21. HF-Jobs eligibility (PR #172) ignores plan
+    # entirely; the premium-model daily-cap tier is still a free vs pro/org split.
     if whoami.get("isPro") is True or whoami.get("is_pro") is True:
         return "pro"
     plan_str = ""

backend/routes/agent.py CHANGED Viewed

@@ -41,83 +41,111 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api", tags=["agent"])
-AVAILABLE_MODELS = [
-    {
-        "id": "moonshotai/Kimi-K2.6",
-        "label": "Kimi K2.6",
-        "provider": "huggingface",
-        "tier": "free",
-        "recommended": True,
-    },
-    {
-        "id": "bedrock/us.anthropic.claude-opus-4-6-v1",
-        "label": "Claude Opus 4.6",
-        "provider": "anthropic",
-        "tier": "pro",
-        "recommended": True,
-    },
-    {
-        "id": "MiniMaxAI/MiniMax-M2.7",
-        "label": "MiniMax M2.7",
-        "provider": "huggingface",
-        "tier": "free",
-    },
-    {
-        "id": "zai-org/GLM-5.1",
-        "label": "GLM 5.1",
-        "provider": "huggingface",
-        "tier": "free",
-    },
-]
-def _is_anthropic_model(model_id: str) -> bool:
-    return "anthropic" in model_id
-async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
-    """403 if a non-``huggingface``-org user tries to select an Anthropic model.
-    Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
-    other model in ``AVAILABLE_MODELS`` is routed through HF Router and
-    billed via ``X-HF-Bill-To``. The gate only fires for Anthropic so
-    non-HF users can still freely switch between the free models.
-    Pattern: https://github.com/huggingface/ml-intern/pull/63
     """
-    if not _is_anthropic_model(model_id):
         return
     if not await require_huggingface_org_member(request):
         raise HTTPException(
             status_code=403,
             detail={
-                "error": "anthropic_restricted",
                 "message": (
-                    "Opus is gated to HF staff. Pick a free model — "
                     "Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead."
                 ),
             },
         )
-async def _enforce_claude_quota(
     user: dict[str, Any],
     agent_session: AgentSession,
 ) -> None:
-    """Charge the user's daily Claude quota on first use of Anthropic in a session.
     Runs at *message-submit* time, not session-create time — so spinning up a
-    Claude session to look around doesn't burn quota. The ``claude_counted``
-    flag on ``AgentSession`` guards against re-counting the same session.
-    No-ops when the session's current model isn't Anthropic, or when this
     session has already been charged. Raises 429 when the user has hit
     their daily cap.
     """
     if agent_session.claude_counted:
         return
     model_name = agent_session.session.config.model_name
-    if not _is_anthropic_model(model_name):
         return
     user_id = user["user_id"]
     cap = user_quotas.daily_cap_for(user.get("plan"))
@@ -126,11 +154,11 @@ async def _enforce_claude_quota(
         raise HTTPException(
             status_code=429,
             detail={
-                "error": "claude_daily_cap",
                 "plan": user.get("plan", "free"),
                 "cap": cap,
                 "message": (
-                    "Daily Claude limit reached. Upgrade to HF Pro for "
                     f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
                 ),
             },
@@ -306,8 +334,8 @@ async def create_session(
     behalf of the user.
     Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
-    ids are rejected (400). The Claude-quota gate runs at message-submit
-    time, not here — spinning up an Opus session to look around is free.
     Returns 503 if the server or user has reached the session limit.
     """
@@ -327,10 +355,9 @@ async def create_session(
     if model and model not in valid_ids:
         raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
-    # Opus is gated to HF staff (PR #63). Only fires when the resolved model
-    # is Anthropic; free models pass through.
     resolved_model = model or session_manager.config.model_name
-    await _require_hf_for_anthropic(request, resolved_model)
     try:
         session_id = await session_manager.create_session(
@@ -355,7 +382,7 @@ async def restore_session_summary(
     session's context as a user-role system note.
     Optional ``"model"`` in the body overrides the session's LLM. The
-    Claude-quota gate runs at message-submit time, not here.
     """
     messages = body.get("messages")
     if not isinstance(messages, list) or not messages:
@@ -369,7 +396,7 @@ async def restore_session_summary(
         raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
     resolved_model = model or session_manager.config.model_name
-    await _require_hf_for_anthropic(request, resolved_model)
     try:
         session_id = await session_manager.create_session(
@@ -417,10 +444,10 @@ async def set_session_model(
     Takes effect on the next LLM call in that session — other sessions
     (including other browser tabs) are unaffected. Model switches don't
-    charge quota — the Claude-quota gate only fires at message-submit time.
-    Switching TO an Anthropic model requires HF org membership (PR #63);
-    free-model switches are unrestricted.
     """
     agent_session = await _check_session_access(session_id, user, request)
     model_id = body.get("model")
@@ -429,7 +456,7 @@ async def set_session_model(
     valid_ids = {m["id"] for m in AVAILABLE_MODELS}
     if model_id not in valid_ids:
         raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
-    await _require_hf_for_anthropic(request, model_id)
     if not agent_session:
         raise HTTPException(status_code=404, detail="Session not found")
     await session_manager.update_session_model(session_id, model_id)
@@ -463,15 +490,16 @@ async def set_session_notifications(
 @router.get("/user/quota")
 async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
-    """Return the user's plan tier and today's Claude-session quota state."""
     plan = user.get("plan", "free")
     used = await user_quotas.get_claude_used_today(user["user_id"])
     cap = user_quotas.daily_cap_for(plan)
     return {
         "plan": plan,
-        "claude_used_today": used,
-        "claude_daily_cap": cap,
-        "claude_remaining": max(0, cap - used),
     }
@@ -518,7 +546,7 @@ async def submit_input(
 ) -> dict:
     """Submit user input to a session. Only accessible by the session owner."""
     agent_session = await _check_session_access(request.session_id, user)
-    await _enforce_claude_quota(user, agent_session)
     success = await session_manager.submit_user_input(request.session_id, request.text)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -570,12 +598,12 @@ async def chat_sse(
     text = body.get("text")
     approvals = body.get("approvals")
-    # Gate user-message sends against the daily Claude quota. Approvals are
     # continuations of an in-progress turn — the session was already charged
     # on its first message, so we skip the gate there.
     if text is not None and not approvals:
         try:
-            await _enforce_claude_quota(user, agent_session)
         except HTTPException:
             broadcaster.unsubscribe(sub_id)
             raise

 router = APIRouter(prefix="/api", tags=["agent"])
+DEFAULT_CLAUDE_MODEL_ID = "bedrock/us.anthropic.claude-opus-4-6-v1"
+GATED_MODEL_IDS = {
+    DEFAULT_CLAUDE_MODEL_ID,
+    "openai/gpt-5.5",
+}
+def _claude_picker_model_id() -> str:
+    """Return the model ID used by the Claude option in the UI.
+    The frontend config sets ``session_manager.config.model_name`` from
+    ``ML_INTERN_CLAUDE_MODEL_ID`` when that env var is present, otherwise it
+    falls back to the production Bedrock Claude model. This function only
+    exposes that resolved config value for the Claude picker; non-Claude models
+    are listed separately in the model switcher.
+    """
+    return session_manager.config.model_name
+def _available_models() -> list[dict[str, Any]]:
+    models = [
+        {
+            "id": "moonshotai/Kimi-K2.6",
+            "label": "Kimi K2.6",
+            "provider": "huggingface",
+            "tier": "free",
+            "recommended": True,
+        },
+        {
+            "id": _claude_picker_model_id(),
+            "label": "Claude Opus 4.6",
+            "provider": "anthropic",
+            "tier": "pro",
+            "recommended": True,
+        },
+        {
+            "id": "openai/gpt-5.5",
+            "label": "GPT-5.5",
+            "provider": "openai",
+            "tier": "pro",
+        },
+        {
+            "id": "MiniMaxAI/MiniMax-M2.7",
+            "label": "MiniMax M2.7",
+            "provider": "huggingface",
+            "tier": "free",
+        },
+        {
+            "id": "zai-org/GLM-5.1",
+            "label": "GLM 5.1",
+            "provider": "huggingface",
+            "tier": "free",
+        },
+    ]
+    return models
+AVAILABLE_MODELS = _available_models()
+def _is_gated_model(model_id: str) -> bool:
+    return model_id in GATED_MODEL_IDS
+async def _require_hf_for_gated_model(request: Request, model_id: str) -> None:
+    """403 if a non-``huggingface``-org user tries to select a gated model.
+    Gated models are deployed paid endpoints backed by service-owned
+    credentials. The gate only fires for deployed paid models so non-HF users
+    can still freely switch between the free models.
     """
+    if not _is_gated_model(model_id):
         return
     if not await require_huggingface_org_member(request):
         raise HTTPException(
             status_code=403,
             detail={
+                "error": "premium_model_restricted",
                 "message": (
+                    "Premium models are gated to HF staff. Pick a free model — "
                     "Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead."
                 ),
             },
         )
+async def _enforce_gated_model_quota(
     user: dict[str, Any],
     agent_session: AgentSession,
 ) -> None:
+    """Charge the user's daily gated-model quota on first use in a session.
     Runs at *message-submit* time, not session-create time — so spinning up a
+    gated-model session to look around doesn't burn quota. The
+    ``claude_counted`` flag on ``AgentSession`` guards against re-counting the
+    same session; the stored field name is kept for persistence compatibility.
+    No-ops when the session's current model isn't gated, or when this
     session has already been charged. Raises 429 when the user has hit
     their daily cap.
     """
     if agent_session.claude_counted:
         return
     model_name = agent_session.session.config.model_name
+    if not _is_gated_model(model_name):
         return
     user_id = user["user_id"]
     cap = user_quotas.daily_cap_for(user.get("plan"))
         raise HTTPException(
             status_code=429,
             detail={
+                "error": "premium_model_daily_cap",
                 "plan": user.get("plan", "free"),
                 "cap": cap,
                 "message": (
+                    "Daily premium model limit reached. Upgrade to HF Pro for "
                     f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
                 ),
             },
     behalf of the user.
     Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
+    ids are rejected (400). The gated-model quota runs at message-submit
+    time, not here — spinning up a session to look around is free.
     Returns 503 if the server or user has reached the session limit.
     """
     if model and model not in valid_ids:
         raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
+    # Deployed paid models are gated to HF staff; free and local-dev models pass through.
     resolved_model = model or session_manager.config.model_name
+    await _require_hf_for_gated_model(request, resolved_model)
     try:
         session_id = await session_manager.create_session(
     session's context as a user-role system note.
     Optional ``"model"`` in the body overrides the session's LLM. The
+    gated-model quota runs at message-submit time, not here.
     """
     messages = body.get("messages")
     if not isinstance(messages, list) or not messages:
         raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
     resolved_model = model or session_manager.config.model_name
+    await _require_hf_for_gated_model(request, resolved_model)
     try:
         session_id = await session_manager.create_session(
     Takes effect on the next LLM call in that session — other sessions
     (including other browser tabs) are unaffected. Model switches don't
+    charge quota — the gated-model quota only fires at message-submit time.
+    Switching TO a gated deployed model requires HF org membership; free-model
+    and local-dev direct provider switches are unrestricted.
     """
     agent_session = await _check_session_access(session_id, user, request)
     model_id = body.get("model")
     valid_ids = {m["id"] for m in AVAILABLE_MODELS}
     if model_id not in valid_ids:
         raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
+    await _require_hf_for_gated_model(request, model_id)
     if not agent_session:
         raise HTTPException(status_code=404, detail="Session not found")
     await session_manager.update_session_model(session_id, model_id)
 @router.get("/user/quota")
 async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
+    """Return the user's plan tier and today's premium-model quota state."""
     plan = user.get("plan", "free")
     used = await user_quotas.get_claude_used_today(user["user_id"])
     cap = user_quotas.daily_cap_for(plan)
+    remaining = max(0, cap - used)
     return {
         "plan": plan,
+        "premium_used_today": used,
+        "premium_daily_cap": cap,
+        "premium_remaining": remaining,
     }
 ) -> dict:
     """Submit user input to a session. Only accessible by the session owner."""
     agent_session = await _check_session_access(request.session_id, user)
+    await _enforce_gated_model_quota(user, agent_session)
     success = await session_manager.submit_user_input(request.session_id, request.text)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
     text = body.get("text")
     approvals = body.get("approvals")
+    # Gate user-message sends against the daily gated-model quota. Approvals are
     # continuations of an in-progress turn — the session was already charged
     # on its first message, so we skip the gate there.
     if text is not None and not approvals:
         try:
+            await _enforce_gated_model_quota(user, agent_session)
         except HTTPException:
             broadcaster.unsubscribe(sub_id)
             raise

backend/user_quotas.py CHANGED Viewed

@@ -1,12 +1,15 @@
-"""Daily quota for Claude session creations.
-Tracks per-user Claude session starts against a daily cap derived from the
-user's HF plan. MongoDB is the source of truth when configured; the
 in-process dict remains the fallback for local/dev/test runs.
-Unit: session *creations*, not messages. A user who selects Claude in a new
-session consumes one quota point; switching an existing Claude session to
-Claude again doesn't (`AgentSession.claude_counted` guards that).
 Cap tiers:
   free user   → CLAUDE_FREE_DAILY (1)

+"""Daily quota for premium model session creations.
+Tracks per-user premium model session starts against a daily cap derived from
+the user's HF plan. MongoDB is the source of truth when configured; the
 in-process dict remains the fallback for local/dev/test runs.
+The public names still say ``claude`` because this quota bucket originally
+only covered Claude and the persisted session field uses that name.
+Unit: session *creations*, not messages. A user who sends with a premium model
+in a new session consumes one quota point; switching an already-counted session
+back to a premium model doesn't (`AgentSession.claude_counted` guards that).
 Cap tiers:
   free user   → CLAUDE_FREE_DAILY (1)

configs/frontend_agent_config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "model_name": "bedrock/us.anthropic.claude-opus-4-6-v1",
   "save_sessions": true,
   "session_dataset_repo": "smolagents/ml-intern-sessions",
   "yolo_mode": false,

 {
+  "model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
   "save_sessions": true,
   "session_dataset_repo": "smolagents/ml-intern-sessions",
   "yolo_mode": false,

frontend/src/components/Chat/ChatInput.tsx CHANGED Viewed

@@ -8,7 +8,13 @@ import { useUserQuota } from '@/hooks/useUserQuota';
 import ClaudeCapDialog from '@/components/ClaudeCapDialog';
 import JobsUpgradeDialog from '@/components/JobsUpgradeDialog';
 import { useAgentStore } from '@/store/agentStore';
-import { CLAUDE_MODEL_PATH, FIRST_FREE_MODEL_PATH, isClaudePath } from '@/utils/model';
 // Model configuration
 interface ModelOption {
@@ -25,7 +31,7 @@ const getHfAvatarUrl = (modelId: string) => {
   return `https://huggingface.co/api/avatars/${org}`;
 };
-const MODEL_OPTIONS: ModelOption[] = [
   {
     id: 'kimi-k2.6',
     name: 'Kimi K2.6',
@@ -42,6 +48,13 @@ const MODEL_OPTIONS: ModelOption[] = [
     avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',
     recommended: true,
   },
   {
     id: 'minimax-m2.7',
     name: 'MiniMax M2.7',
@@ -58,8 +71,8 @@ const MODEL_OPTIONS: ModelOption[] = [
   },
 ];
-const findModelByPath = (path: string): ModelOption | undefined => {
-  return MODEL_OPTIONS.find(m => m.modelPath === path || path?.includes(m.id));
 };
 interface ChatInputProps {
@@ -72,16 +85,20 @@ interface ChatInputProps {
 }
 const isClaudeModel = (m: ModelOption) => isClaudePath(m.modelPath);
-const firstFreeModel = () => MODEL_OPTIONS.find(m => !isClaudeModel(m)) ?? MODEL_OPTIONS[0];
 export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
   const [input, setInput] = useState('');
   const inputRef = useRef<HTMLTextAreaElement>(null);
-  const [selectedModelId, setSelectedModelId] = useState<string>(MODEL_OPTIONS[0].id);
   const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
   const { quota, refresh: refreshQuota } = useUserQuota();
   // The daily-cap dialog is triggered from two places: (a) a 429 returned
-  // from the chat transport when the user tries to send on Opus over cap —
   // surfaced via the agent-store flag — and (b) nothing else right now
   // (switching models is free). Keeping the open state in the store means
   // the hook layer can flip it without threading props through.
@@ -92,6 +109,41 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
   const [awaitingTopUp, setAwaitingTopUp] = useState(false);
   const lastSentRef = useRef<string>('');
   // Model is per-session: fetch this tab's current model every time the
   // session changes. Other tabs keep their own selections independently.
   useEffect(() => {
@@ -102,7 +154,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
       .then((data) => {
         if (cancelled) return;
         if (data?.model) {
-          const model = findModelByPath(data.model);
           if (model) setSelectedModelId(model.id);
         }
       })
@@ -110,7 +162,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
     return () => { cancelled = true; };
   }, [sessionId]);
-  const selectedModel = MODEL_OPTIONS.find(m => m.id === selectedModelId) || MODEL_OPTIONS[0];
   // Auto-focus the textarea when the session becomes ready
   useEffect(() => {
@@ -127,7 +179,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
     }
   }, [input, disabled, onSend]);
-  // When the chat transport reports a Claude-quota 429, restore the typed
   // text so the user doesn't lose their message.
   useEffect(() => {
     if (claudeQuotaExhausted && lastSentRef.current) {
@@ -178,12 +230,12 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
   }, [setClaudeQuotaExhausted]);
   // "Use a free model" — switch the current session to Kimi (or the first
-  // non-Anthropic option) and auto-retry the send that tripped the cap.
   const handleUseFreeModel = useCallback(async () => {
     setClaudeQuotaExhausted(false);
     if (!sessionId) return;
-    const free = MODEL_OPTIONS.find(m => m.modelPath === FIRST_FREE_MODEL_PATH)
-      ?? firstFreeModel();
     try {
       const res = await apiFetch(`/api/session/${sessionId}/model`, {
         method: 'POST',
@@ -199,14 +251,14 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
         }
       }
     } catch { /* ignore */ }
-  }, [sessionId, onSend, setClaudeQuotaExhausted]);
-  const handleClaudeUpgradeClick = useCallback(async () => {
     if (!sessionId) return;
     try {
       await apiFetch(`/api/pro-click/${sessionId}`, {
         method: 'POST',
-        body: JSON.stringify({ source: 'claude_cap_dialog', target: 'pro_pricing' }),
       });
     } catch {
       /* tracking is best-effort */
@@ -254,14 +306,14 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
     return () => document.removeEventListener('visibilitychange', onVisible);
   }, [awaitingTopUp, jobsUpgradeRequired, handleJobsRetry]);
-  // Hide the chip until the user has actually burned quota — an unused
-  // Opus session shouldn't populate a counter.
-  const claudeChip = (() => {
-    if (!quota || quota.claudeUsedToday === 0) return null;
     if (quota.plan === 'free') {
-      return quota.claudeRemaining > 0 ? 'Free today' : 'Pro only';
     }
-    return `${quota.claudeUsedToday}/${quota.claudeDailyCap} today`;
   })();
   return (
@@ -426,7 +478,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
             }
           }}
         >
-          {MODEL_OPTIONS.map((model) => (
             <MenuItem
               key={model.id}
               onClick={() => handleSelectModel(model)}
@@ -462,9 +514,9 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
                         }}
                       />
                     )}
-                    {isClaudeModel(model) && claudeChip && (
                       <Chip
-                        label={claudeChip}
                         size="small"
                         sx={{
                           height: '18px',
@@ -489,10 +541,10 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
         <ClaudeCapDialog
           open={claudeQuotaExhausted}
           plan={quota?.plan ?? 'free'}
-          cap={quota?.claudeDailyCap ?? 1}
           onClose={handleCapDialogClose}
           onUseFreeModel={handleUseFreeModel}
-          onUpgrade={handleClaudeUpgradeClick}
         />
         <JobsUpgradeDialog
           open={!!jobsUpgradeRequired}

 import ClaudeCapDialog from '@/components/ClaudeCapDialog';
 import JobsUpgradeDialog from '@/components/JobsUpgradeDialog';
 import { useAgentStore } from '@/store/agentStore';
+import {
+  CLAUDE_MODEL_PATH,
+  FIRST_FREE_MODEL_PATH,
+  GPT_55_MODEL_PATH,
+  isClaudePath,
+  isPremiumPath,
+} from '@/utils/model';
 // Model configuration
 interface ModelOption {
   return `https://huggingface.co/api/avatars/${org}`;
 };
+const DEFAULT_MODEL_OPTIONS: ModelOption[] = [
   {
     id: 'kimi-k2.6',
     name: 'Kimi K2.6',
     avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',
     recommended: true,
   },
+  {
+    id: 'gpt-5.5',
+    name: 'GPT-5.5',
+    description: 'OpenAI',
+    modelPath: GPT_55_MODEL_PATH,
+    avatarUrl: 'https://huggingface.co/api/avatars/openai',
+  },
   {
     id: 'minimax-m2.7',
     name: 'MiniMax M2.7',
   },
 ];
+const findModelByPath = (path: string, options: ModelOption[]): ModelOption | undefined => {
+  return options.find(m => m.modelPath === path || path?.includes(m.id));
 };
 interface ChatInputProps {
 }
 const isClaudeModel = (m: ModelOption) => isClaudePath(m.modelPath);
+const isPremiumModel = (m: ModelOption) => isPremiumPath(m.modelPath);
+const firstFreeModel = (options: ModelOption[]) => options.find(m => !isPremiumModel(m)) ?? options[0];
 export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
   const [input, setInput] = useState('');
   const inputRef = useRef<HTMLTextAreaElement>(null);
+  const [modelOptions, setModelOptions] = useState<ModelOption[]>(DEFAULT_MODEL_OPTIONS);
+  const modelOptionsRef = useRef<ModelOption[]>(DEFAULT_MODEL_OPTIONS);
+  const sessionIdRef = useRef<string | undefined>(sessionId);
+  const [selectedModelId, setSelectedModelId] = useState<string>(DEFAULT_MODEL_OPTIONS[0].id);
   const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
   const { quota, refresh: refreshQuota } = useUserQuota();
   // The daily-cap dialog is triggered from two places: (a) a 429 returned
+  // from the chat transport when the user tries to send on a premium model over cap —
   // surfaced via the agent-store flag — and (b) nothing else right now
   // (switching models is free). Keeping the open state in the store means
   // the hook layer can flip it without threading props through.
   const [awaitingTopUp, setAwaitingTopUp] = useState(false);
   const lastSentRef = useRef<string>('');
+  useEffect(() => {
+    modelOptionsRef.current = modelOptions;
+  }, [modelOptions]);
+  useEffect(() => {
+    sessionIdRef.current = sessionId;
+  }, [sessionId]);
+  useEffect(() => {
+    let cancelled = false;
+    apiFetch('/api/config/model')
+      .then((res) => (res.ok ? res.json() : null))
+      .then((data) => {
+        if (cancelled || !data?.available) return;
+        const claude = data.available.find((m: { provider?: string; id?: string }) => (
+          m.provider === 'anthropic' && m.id
+        ));
+        if (!claude?.id) return;
+        const next = DEFAULT_MODEL_OPTIONS.map((option) => (
+          isClaudeModel(option)
+            ? { ...option, modelPath: claude.id, name: claude.label ?? option.name }
+            : option
+        ));
+        modelOptionsRef.current = next;
+        setModelOptions(next);
+        if (!sessionIdRef.current) {
+          const current = data.current ? findModelByPath(data.current, next) : null;
+          if (current) setSelectedModelId(current.id);
+        }
+      })
+      .catch(() => { /* ignore */ });
+    return () => { cancelled = true; };
+  }, []);
   // Model is per-session: fetch this tab's current model every time the
   // session changes. Other tabs keep their own selections independently.
   useEffect(() => {
       .then((data) => {
         if (cancelled) return;
         if (data?.model) {
+          const model = findModelByPath(data.model, modelOptionsRef.current);
           if (model) setSelectedModelId(model.id);
         }
       })
     return () => { cancelled = true; };
   }, [sessionId]);
+  const selectedModel = modelOptions.find(m => m.id === selectedModelId) || modelOptions[0];
   // Auto-focus the textarea when the session becomes ready
   useEffect(() => {
     }
   }, [input, disabled, onSend]);
+  // When the chat transport reports a premium-model quota 429, restore the typed
   // text so the user doesn't lose their message.
   useEffect(() => {
     if (claudeQuotaExhausted && lastSentRef.current) {
   }, [setClaudeQuotaExhausted]);
   // "Use a free model" — switch the current session to Kimi (or the first
+  // non-premium option) and auto-retry the send that tripped the cap.
   const handleUseFreeModel = useCallback(async () => {
     setClaudeQuotaExhausted(false);
     if (!sessionId) return;
+    const free = modelOptions.find(m => m.modelPath === FIRST_FREE_MODEL_PATH)
+      ?? firstFreeModel(modelOptions);
     try {
       const res = await apiFetch(`/api/session/${sessionId}/model`, {
         method: 'POST',
         }
       }
     } catch { /* ignore */ }
+  }, [sessionId, onSend, setClaudeQuotaExhausted, modelOptions]);
+  const handlePremiumUpgradeClick = useCallback(async () => {
     if (!sessionId) return;
     try {
       await apiFetch(`/api/pro-click/${sessionId}`, {
         method: 'POST',
+        body: JSON.stringify({ source: 'premium_cap_dialog', target: 'pro_pricing' }),
       });
     } catch {
       /* tracking is best-effort */
     return () => document.removeEventListener('visibilitychange', onVisible);
   }, [awaitingTopUp, jobsUpgradeRequired, handleJobsRetry]);
+  // Hide the chip until the user has actually burned quota; opening a
+  // premium-model session without sending should not populate a counter.
+  const premiumChip = (() => {
+    if (!quota || quota.premiumUsedToday === 0) return null;
     if (quota.plan === 'free') {
+      return quota.premiumRemaining > 0 ? 'Free today' : 'Pro only';
     }
+    return `${quota.premiumUsedToday}/${quota.premiumDailyCap} today`;
   })();
   return (
             }
           }}
         >
+          {modelOptions.map((model) => (
             <MenuItem
               key={model.id}
               onClick={() => handleSelectModel(model)}
                         }}
                       />
                     )}
+                    {isPremiumModel(model) && premiumChip && (
                       <Chip
+                        label={premiumChip}
                         size="small"
                         sx={{
                           height: '18px',
         <ClaudeCapDialog
           open={claudeQuotaExhausted}
           plan={quota?.plan ?? 'free'}
+          cap={quota?.premiumDailyCap ?? 1}
           onClose={handleCapDialogClose}
           onUseFreeModel={handleUseFreeModel}
+          onUpgrade={handlePremiumUpgradeClick}
         />
         <JobsUpgradeDialog
           open={!!jobsUpgradeRequired}

frontend/src/components/ClaudeCapDialog.tsx CHANGED Viewed

@@ -55,15 +55,15 @@ export default function ClaudeCapDialog({
       <DialogTitle
         sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}
       >
-        You've hit your Opus limit
       </DialogTitle>
       <DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>
         <DialogContentText
           sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
         >
-          Opus costs an arm and a leg, so we unfortunately have to cap you at {cap}{' '}
-          {cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin —
-          they are genuinely good and we use them all the time.
         </DialogContentText>
         <Box
           sx={{
@@ -85,14 +85,14 @@ export default function ClaudeCapDialog({
               letterSpacing: '0.02em',
             }}
           >
-            HF Pro ($9/mo) — more Opus, more everything
           </Typography>
           <Typography
             variant="caption"
             sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
           >
-            {PRO_CAP} Opus sessions/day here, 20× HF Inference credits, ZeroGPU access,
-            and priority on Spaces hardware.
           </Typography>
         </Box>
       </DialogContent>

       <DialogTitle
         sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}
       >
+        You've hit your premium model limit
       </DialogTitle>
       <DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>
         <DialogContentText
           sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
         >
+          Opus and GPT-5.5 are expensive to run, so we cap premium models at {cap}{' '}
+          {cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin
+          instead.
         </DialogContentText>
         <Box
           sx={{
               letterSpacing: '0.02em',
             }}
           >
+            HF Pro ($9/mo) — more premium model sessions
           </Typography>
           <Typography
             variant="caption"
             sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
           >
+            {PRO_CAP} premium model sessions/day here, 20× HF Inference credits,
+            ZeroGPU access, and priority on Spaces hardware.
           </Typography>
         </Box>
       </DialogContent>

frontend/src/hooks/useAgentChat.ts CHANGED Viewed

@@ -346,7 +346,7 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
     sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
     onError: (error) => {
       updateSession(sessionId, { isProcessing: false });
-      // Claude daily-cap: open the cap dialog instead of the generic error
       // banner. Transport marks the error with this sentinel.
       if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {
         if (isActiveRef.current) {

     sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
     onError: (error) => {
       updateSession(sessionId, { isProcessing: false });
+      // Premium-model daily cap: open the cap dialog instead of the generic error
       // banner. Transport marks the error with this sentinel.
       if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {
         if (isActiveRef.current) {

frontend/src/hooks/useUserQuota.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Reads the current user's Claude daily quota + plan tier from the backend.
  *
  * Fetches once when the user becomes authenticated, and exposes a `refresh()`
  * that callers invoke after a successful session-create / model-switch so the
@@ -13,9 +13,9 @@ export type PlanTier = 'free' | 'pro' | 'org';
 export interface UserQuota {
   plan: PlanTier;
-  claudeUsedToday: number;
-  claudeDailyCap: number;
-  claudeRemaining: number;
 }
 export function useUserQuota() {
@@ -32,9 +32,9 @@ export function useUserQuota() {
       const data = await res.json();
       setQuota({
         plan: (data.plan ?? 'free') as PlanTier,
-        claudeUsedToday: data.claude_used_today ?? 0,
-        claudeDailyCap: data.claude_daily_cap ?? 1,
-        claudeRemaining: data.claude_remaining ?? 0,
       });
     } catch {
       /* backend unreachable — leave previous value */

 /**
+ * Reads the current user's premium-model daily quota + plan tier from the backend.
  *
  * Fetches once when the user becomes authenticated, and exposes a `refresh()`
  * that callers invoke after a successful session-create / model-switch so the
 export interface UserQuota {
   plan: PlanTier;
+  premiumUsedToday: number;
+  premiumDailyCap: number;
+  premiumRemaining: number;
 }
 export function useUserQuota() {
       const data = await res.json();
       setQuota({
         plan: (data.plan ?? 'free') as PlanTier,
+        premiumUsedToday: data.premium_used_today ?? 0,
+        premiumDailyCap: data.premium_daily_cap ?? 1,
+        premiumRemaining: data.premium_remaining ?? 0,
       });
     } catch {
       /* backend unreachable — leave previous value */

frontend/src/lib/sse-chat-transport.ts CHANGED Viewed

@@ -402,7 +402,7 @@ export class SSEChatTransport implements ChatTransport<UIMessage> {
       this.sideChannel.onSessionDead(sessionId);
     }
     if (response.status === 429) {
-      // Claude daily-quota gate tripped. The prefix is the detection marker
       // for useAgentChat's onError handler, which surfaces the cap dialog
       // instead of a generic error banner.
       throw new Error('CLAUDE_QUOTA_EXHAUSTED');

       this.sideChannel.onSessionDead(sessionId);
     }
     if (response.status === 429) {
+      // Premium-model daily quota gate tripped. The prefix is the detection marker
       // for useAgentChat's onError handler, which surfaces the cap dialog
       // instead of a generic error banner.
       throw new Error('CLAUDE_QUOTA_EXHAUSTED');

frontend/src/store/agentStore.ts CHANGED Viewed

@@ -113,7 +113,7 @@ interface AgentStore {
   user: User | null;
   error: string | null;
   llmHealthError: LLMHealthError | null;
-  /** Set when a Claude-send hits the daily quota — ChatInput opens the cap dialog in response. */
   claudeQuotaExhausted: boolean;
   jobsUpgradeRequired: JobsUpgradeState | null;

   user: User | null;
   error: string | null;
   llmHealthError: LLMHealthError | null;
+  /** Set when a premium-model send hits the daily quota; ChatInput opens the cap dialog. */
   claudeQuotaExhausted: boolean;
   jobsUpgradeRequired: JobsUpgradeState | null;

frontend/src/utils/model.ts CHANGED Viewed

@@ -1,14 +1,19 @@
 /**
  * Shared model-id constants used by session-create call sites and the
- * ClaudeCapDialog "Use a free model" escape hatch.
  *
  * Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and
  * AVAILABLE_MODELS in backend/routes/agent.py.
  */
 export const CLAUDE_MODEL_PATH = 'bedrock/us.anthropic.claude-opus-4-6-v1';
 export const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';
 export function isClaudePath(modelPath: string | undefined): boolean {
   return !!modelPath && modelPath.includes('anthropic');
 }

 /**
  * Shared model-id constants used by session-create call sites and the
+ * premium-model cap dialog "Use a free model" escape hatch.
  *
  * Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and
  * AVAILABLE_MODELS in backend/routes/agent.py.
  */
 export const CLAUDE_MODEL_PATH = 'bedrock/us.anthropic.claude-opus-4-6-v1';
+export const GPT_55_MODEL_PATH = 'openai/gpt-5.5';
 export const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';
 export function isClaudePath(modelPath: string | undefined): boolean {
   return !!modelPath && modelPath.includes('anthropic');
 }
+export function isPremiumPath(modelPath: string | undefined): boolean {
+  return modelPath === CLAUDE_MODEL_PATH || modelPath === GPT_55_MODEL_PATH;
+}

tests/unit/test_agent_model_gating.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""Tests for gated model handling in backend/routes/agent.py."""
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+import pytest
+from fastapi import HTTPException
+_BACKEND_DIR = Path(__file__).resolve().parent.parent.parent / "backend"
+if str(_BACKEND_DIR) not in sys.path:
+    sys.path.insert(0, str(_BACKEND_DIR))
+from routes import agent  # noqa: E402
+@pytest.fixture(autouse=True)
+def _reset_quota_store():
+    agent.user_quotas._reset_for_tests()
+    yield
+    agent.user_quotas._reset_for_tests()
+def test_gated_model_predicate_includes_bedrock_claude_and_gpt55_only():
+    assert agent._is_gated_model("bedrock/us.anthropic.claude-opus-4-6-v1")
+    assert agent._is_gated_model("openai/gpt-5.5")
+    assert not agent._is_gated_model("anthropic/claude-opus-4-6")
+    assert not agent._is_gated_model("moonshotai/Kimi-K2.6")
+@pytest.mark.asyncio
+async def test_gated_model_gate_rejects_gpt55_for_non_hf_user(monkeypatch):
+    async def fake_require_hf_org_member(_request):
+        return False
+    monkeypatch.setattr(agent, "require_huggingface_org_member", fake_require_hf_org_member)
+    with pytest.raises(HTTPException) as exc_info:
+        await agent._require_hf_for_gated_model(None, "openai/gpt-5.5")
+    assert exc_info.value.status_code == 403
+    assert exc_info.value.detail["error"] == "premium_model_restricted"
+@pytest.mark.asyncio
+async def test_ungated_models_skip_hf_membership_check(monkeypatch):
+    async def fail_if_called(_request):
+        raise AssertionError("ungated models must not require HF org membership")
+    monkeypatch.setattr(agent, "require_huggingface_org_member", fail_if_called)
+    await agent._require_hf_for_gated_model(None, "moonshotai/Kimi-K2.6")
+    await agent._require_hf_for_gated_model(None, "anthropic/claude-opus-4-6")
+@pytest.mark.asyncio
+async def test_gated_quota_charges_gpt55(monkeypatch):
+    persisted = []
+    async def fake_persist_session_snapshot(agent_session):
+        persisted.append(agent_session)
+    monkeypatch.setattr(
+        agent.session_manager,
+        "persist_session_snapshot",
+        fake_persist_session_snapshot,
+    )
+    agent_session = SimpleNamespace(
+        claude_counted=False,
+        session=SimpleNamespace(
+            config=SimpleNamespace(model_name="openai/gpt-5.5"),
+        ),
+    )
+    await agent._enforce_gated_model_quota(
+        {"user_id": "u1", "plan": "free"},
+        agent_session,
+    )
+    assert agent_session.claude_counted is True
+    assert persisted == [agent_session]
+    assert await agent.user_quotas.get_claude_used_today("u1") == 1
+@pytest.mark.asyncio
+async def test_gated_quota_skips_direct_anthropic(monkeypatch):
+    async def fail_if_persisted(_agent_session):
+        raise AssertionError("direct Anthropic should not consume deployed gated quota")
+    monkeypatch.setattr(
+        agent.session_manager,
+        "persist_session_snapshot",
+        fail_if_persisted,
+    )
+    agent_session = SimpleNamespace(
+        claude_counted=False,
+        session=SimpleNamespace(
+            config=SimpleNamespace(model_name="anthropic/claude-opus-4-6"),
+        ),
+    )
+    await agent._enforce_gated_model_quota(
+        {"user_id": "u1", "plan": "free"},
+        agent_session,
+    )
+    assert agent_session.claude_counted is False
+    assert await agent.user_quotas.get_claude_used_today("u1") == 0
+@pytest.mark.asyncio
+async def test_user_quota_response_uses_premium_fields_only(monkeypatch):
+    async def fake_get_used_today(user_id):
+        assert user_id == "u1"
+        return 2
+    monkeypatch.setattr(agent.user_quotas, "get_claude_used_today", fake_get_used_today)
+    monkeypatch.setattr(agent.user_quotas, "daily_cap_for", lambda plan: 5)
+    response = await agent.get_user_quota({"user_id": "u1", "plan": "pro"})
+    assert response == {
+        "plan": "pro",
+        "premium_used_today": 2,
+        "premium_daily_cap": 5,
+        "premium_remaining": 3,
+    }