Expose OpenAI models in the UI & make Claude model picker configurable (#183)
Browse files* Add agent dev server notes
* Make frontend model configurable
* Support env-selected frontend models
* Use Claude-specific model env var
* Add GPT-5.5 to web model picker
* Gate GPT-5.5 as a premium model
* Avoid duplicate session model fetch
* Remove legacy Claude quota aliases
* Document GitHub CLI PR body workflow
* Gate only deployed paid model IDs
* Nits
- AGENTS.md +20 -0
- backend/dependencies.py +1 -1
- backend/routes/agent.py +100 -72
- backend/user_quotas.py +9 -6
- configs/frontend_agent_config.json +1 -1
- frontend/src/components/Chat/ChatInput.tsx +79 -27
- frontend/src/components/ClaudeCapDialog.tsx +7 -7
- frontend/src/hooks/useAgentChat.ts +1 -1
- frontend/src/hooks/useUserQuota.ts +7 -7
- frontend/src/lib/sse-chat-transport.ts +1 -1
- frontend/src/store/agentStore.ts +1 -1
- frontend/src/utils/model.ts +6 -1
- tests/unit/test_agent_model_gating.py +129 -0
AGENTS.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Agent Notes
|
| 2 |
+
|
| 3 |
+
## Local Dev Servers
|
| 4 |
+
|
| 5 |
+
- Frontend: from `frontend/`, run `npm ci` if dependencies are missing, then `npm run dev`.
|
| 6 |
+
- Backend: from `backend/`, run `uv run uvicorn main:app --host ::1 --port 7860`.
|
| 7 |
+
- Frontend URL: http://localhost:5173/
|
| 8 |
+
- Backend health check: `curl -g http://[::1]:7860/api`
|
| 9 |
+
- Frontend proxy health check: `curl http://localhost:5173/api`
|
| 10 |
+
|
| 11 |
+
Notes:
|
| 12 |
+
|
| 13 |
+
- Vite proxies `/api` and `/auth` to `http://localhost:7860`.
|
| 14 |
+
- If `127.0.0.1:7860` is already owned by another local process, binding the backend to `::1` lets the Vite proxy resolve `localhost` cleanly.
|
| 15 |
+
- Prefer `npm ci` over `npm install` for setup, since `npm install` may rewrite `frontend/package-lock.json` metadata depending on npm version.
|
| 16 |
+
- Production defaults to the Bedrock Claude model. For local development with a personal Anthropic key, set `ANTHROPIC_API_KEY` and `ML_INTERN_CLAUDE_MODEL_ID=anthropic/claude-opus-4-6` before starting the backend. Other models are selected through the app's model switcher.
|
| 17 |
+
|
| 18 |
+
## GitHub CLI
|
| 19 |
+
|
| 20 |
+
- For multiline PR descriptions, prefer `gh pr edit <number> --body-file <file>` over inline `--body` so shell quoting, `$` env-var names, backticks, and newlines are preserved correctly.
|
backend/dependencies.py
CHANGED
|
@@ -111,7 +111,7 @@ async def _fetch_user_plan(token: str) -> str:
|
|
| 111 |
|
| 112 |
# OAuth whoami sets `type: "user"` and surfaces Pro via the `isPro` boolean
|
| 113 |
# — see Space discussion #21. HF-Jobs eligibility (PR #172) ignores plan
|
| 114 |
-
# entirely; the
|
| 115 |
if whoami.get("isPro") is True or whoami.get("is_pro") is True:
|
| 116 |
return "pro"
|
| 117 |
plan_str = ""
|
|
|
|
| 111 |
|
| 112 |
# OAuth whoami sets `type: "user"` and surfaces Pro via the `isPro` boolean
|
| 113 |
# — see Space discussion #21. HF-Jobs eligibility (PR #172) ignores plan
|
| 114 |
+
# entirely; the premium-model daily-cap tier is still a free vs pro/org split.
|
| 115 |
if whoami.get("isPro") is True or whoami.get("is_pro") is True:
|
| 116 |
return "pro"
|
| 117 |
plan_str = ""
|
backend/routes/agent.py
CHANGED
|
@@ -41,83 +41,111 @@ logger = logging.getLogger(__name__)
|
|
| 41 |
|
| 42 |
router = APIRouter(prefix="/api", tags=["agent"])
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
"""
|
| 88 |
-
if not
|
| 89 |
return
|
| 90 |
if not await require_huggingface_org_member(request):
|
| 91 |
raise HTTPException(
|
| 92 |
status_code=403,
|
| 93 |
detail={
|
| 94 |
-
"error": "
|
| 95 |
"message": (
|
| 96 |
-
"
|
| 97 |
"Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead."
|
| 98 |
),
|
| 99 |
},
|
| 100 |
)
|
| 101 |
|
| 102 |
|
| 103 |
-
async def
|
| 104 |
user: dict[str, Any],
|
| 105 |
agent_session: AgentSession,
|
| 106 |
) -> None:
|
| 107 |
-
"""Charge the user's daily
|
| 108 |
|
| 109 |
Runs at *message-submit* time, not session-create time — so spinning up a
|
| 110 |
-
|
| 111 |
-
flag on ``AgentSession`` guards against re-counting the
|
|
|
|
| 112 |
|
| 113 |
-
No-ops when the session's current model isn't
|
| 114 |
session has already been charged. Raises 429 when the user has hit
|
| 115 |
their daily cap.
|
| 116 |
"""
|
| 117 |
if agent_session.claude_counted:
|
| 118 |
return
|
| 119 |
model_name = agent_session.session.config.model_name
|
| 120 |
-
if not
|
| 121 |
return
|
| 122 |
user_id = user["user_id"]
|
| 123 |
cap = user_quotas.daily_cap_for(user.get("plan"))
|
|
@@ -126,11 +154,11 @@ async def _enforce_claude_quota(
|
|
| 126 |
raise HTTPException(
|
| 127 |
status_code=429,
|
| 128 |
detail={
|
| 129 |
-
"error": "
|
| 130 |
"plan": user.get("plan", "free"),
|
| 131 |
"cap": cap,
|
| 132 |
"message": (
|
| 133 |
-
"Daily
|
| 134 |
f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
|
| 135 |
),
|
| 136 |
},
|
|
@@ -306,8 +334,8 @@ async def create_session(
|
|
| 306 |
behalf of the user.
|
| 307 |
|
| 308 |
Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
|
| 309 |
-
ids are rejected (400). The
|
| 310 |
-
time, not here — spinning up
|
| 311 |
|
| 312 |
Returns 503 if the server or user has reached the session limit.
|
| 313 |
"""
|
|
@@ -327,10 +355,9 @@ async def create_session(
|
|
| 327 |
if model and model not in valid_ids:
|
| 328 |
raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
|
| 329 |
|
| 330 |
-
#
|
| 331 |
-
# is Anthropic; free models pass through.
|
| 332 |
resolved_model = model or session_manager.config.model_name
|
| 333 |
-
await
|
| 334 |
|
| 335 |
try:
|
| 336 |
session_id = await session_manager.create_session(
|
|
@@ -355,7 +382,7 @@ async def restore_session_summary(
|
|
| 355 |
session's context as a user-role system note.
|
| 356 |
|
| 357 |
Optional ``"model"`` in the body overrides the session's LLM. The
|
| 358 |
-
|
| 359 |
"""
|
| 360 |
messages = body.get("messages")
|
| 361 |
if not isinstance(messages, list) or not messages:
|
|
@@ -369,7 +396,7 @@ async def restore_session_summary(
|
|
| 369 |
raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
|
| 370 |
|
| 371 |
resolved_model = model or session_manager.config.model_name
|
| 372 |
-
await
|
| 373 |
|
| 374 |
try:
|
| 375 |
session_id = await session_manager.create_session(
|
|
@@ -417,10 +444,10 @@ async def set_session_model(
|
|
| 417 |
|
| 418 |
Takes effect on the next LLM call in that session — other sessions
|
| 419 |
(including other browser tabs) are unaffected. Model switches don't
|
| 420 |
-
charge quota — the
|
| 421 |
|
| 422 |
-
Switching TO
|
| 423 |
-
|
| 424 |
"""
|
| 425 |
agent_session = await _check_session_access(session_id, user, request)
|
| 426 |
model_id = body.get("model")
|
|
@@ -429,7 +456,7 @@ async def set_session_model(
|
|
| 429 |
valid_ids = {m["id"] for m in AVAILABLE_MODELS}
|
| 430 |
if model_id not in valid_ids:
|
| 431 |
raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
|
| 432 |
-
await
|
| 433 |
if not agent_session:
|
| 434 |
raise HTTPException(status_code=404, detail="Session not found")
|
| 435 |
await session_manager.update_session_model(session_id, model_id)
|
|
@@ -463,15 +490,16 @@ async def set_session_notifications(
|
|
| 463 |
|
| 464 |
@router.get("/user/quota")
|
| 465 |
async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
|
| 466 |
-
"""Return the user's plan tier and today's
|
| 467 |
plan = user.get("plan", "free")
|
| 468 |
used = await user_quotas.get_claude_used_today(user["user_id"])
|
| 469 |
cap = user_quotas.daily_cap_for(plan)
|
|
|
|
| 470 |
return {
|
| 471 |
"plan": plan,
|
| 472 |
-
"
|
| 473 |
-
"
|
| 474 |
-
"
|
| 475 |
}
|
| 476 |
|
| 477 |
|
|
@@ -518,7 +546,7 @@ async def submit_input(
|
|
| 518 |
) -> dict:
|
| 519 |
"""Submit user input to a session. Only accessible by the session owner."""
|
| 520 |
agent_session = await _check_session_access(request.session_id, user)
|
| 521 |
-
await
|
| 522 |
success = await session_manager.submit_user_input(request.session_id, request.text)
|
| 523 |
if not success:
|
| 524 |
raise HTTPException(status_code=404, detail="Session not found or inactive")
|
|
@@ -570,12 +598,12 @@ async def chat_sse(
|
|
| 570 |
text = body.get("text")
|
| 571 |
approvals = body.get("approvals")
|
| 572 |
|
| 573 |
-
# Gate user-message sends against the daily
|
| 574 |
# continuations of an in-progress turn — the session was already charged
|
| 575 |
# on its first message, so we skip the gate there.
|
| 576 |
if text is not None and not approvals:
|
| 577 |
try:
|
| 578 |
-
await
|
| 579 |
except HTTPException:
|
| 580 |
broadcaster.unsubscribe(sub_id)
|
| 581 |
raise
|
|
|
|
| 41 |
|
| 42 |
router = APIRouter(prefix="/api", tags=["agent"])
|
| 43 |
|
| 44 |
+
DEFAULT_CLAUDE_MODEL_ID = "bedrock/us.anthropic.claude-opus-4-6-v1"
|
| 45 |
+
GATED_MODEL_IDS = {
|
| 46 |
+
DEFAULT_CLAUDE_MODEL_ID,
|
| 47 |
+
"openai/gpt-5.5",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _claude_picker_model_id() -> str:
|
| 52 |
+
"""Return the model ID used by the Claude option in the UI.
|
| 53 |
+
|
| 54 |
+
The frontend config sets ``session_manager.config.model_name`` from
|
| 55 |
+
``ML_INTERN_CLAUDE_MODEL_ID`` when that env var is present, otherwise it
|
| 56 |
+
falls back to the production Bedrock Claude model. This function only
|
| 57 |
+
exposes that resolved config value for the Claude picker; non-Claude models
|
| 58 |
+
are listed separately in the model switcher.
|
| 59 |
+
"""
|
| 60 |
+
return session_manager.config.model_name
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _available_models() -> list[dict[str, Any]]:
|
| 64 |
+
models = [
|
| 65 |
+
{
|
| 66 |
+
"id": "moonshotai/Kimi-K2.6",
|
| 67 |
+
"label": "Kimi K2.6",
|
| 68 |
+
"provider": "huggingface",
|
| 69 |
+
"tier": "free",
|
| 70 |
+
"recommended": True,
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"id": _claude_picker_model_id(),
|
| 74 |
+
"label": "Claude Opus 4.6",
|
| 75 |
+
"provider": "anthropic",
|
| 76 |
+
"tier": "pro",
|
| 77 |
+
"recommended": True,
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"id": "openai/gpt-5.5",
|
| 81 |
+
"label": "GPT-5.5",
|
| 82 |
+
"provider": "openai",
|
| 83 |
+
"tier": "pro",
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"id": "MiniMaxAI/MiniMax-M2.7",
|
| 87 |
+
"label": "MiniMax M2.7",
|
| 88 |
+
"provider": "huggingface",
|
| 89 |
+
"tier": "free",
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"id": "zai-org/GLM-5.1",
|
| 93 |
+
"label": "GLM 5.1",
|
| 94 |
+
"provider": "huggingface",
|
| 95 |
+
"tier": "free",
|
| 96 |
+
},
|
| 97 |
+
]
|
| 98 |
+
return models
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
AVAILABLE_MODELS = _available_models()
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def _is_gated_model(model_id: str) -> bool:
|
| 105 |
+
return model_id in GATED_MODEL_IDS
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
async def _require_hf_for_gated_model(request: Request, model_id: str) -> None:
|
| 109 |
+
"""403 if a non-``huggingface``-org user tries to select a gated model.
|
| 110 |
+
|
| 111 |
+
Gated models are deployed paid endpoints backed by service-owned
|
| 112 |
+
credentials. The gate only fires for deployed paid models so non-HF users
|
| 113 |
+
can still freely switch between the free models.
|
| 114 |
"""
|
| 115 |
+
if not _is_gated_model(model_id):
|
| 116 |
return
|
| 117 |
if not await require_huggingface_org_member(request):
|
| 118 |
raise HTTPException(
|
| 119 |
status_code=403,
|
| 120 |
detail={
|
| 121 |
+
"error": "premium_model_restricted",
|
| 122 |
"message": (
|
| 123 |
+
"Premium models are gated to HF staff. Pick a free model — "
|
| 124 |
"Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead."
|
| 125 |
),
|
| 126 |
},
|
| 127 |
)
|
| 128 |
|
| 129 |
|
| 130 |
+
async def _enforce_gated_model_quota(
|
| 131 |
user: dict[str, Any],
|
| 132 |
agent_session: AgentSession,
|
| 133 |
) -> None:
|
| 134 |
+
"""Charge the user's daily gated-model quota on first use in a session.
|
| 135 |
|
| 136 |
Runs at *message-submit* time, not session-create time — so spinning up a
|
| 137 |
+
gated-model session to look around doesn't burn quota. The
|
| 138 |
+
``claude_counted`` flag on ``AgentSession`` guards against re-counting the
|
| 139 |
+
same session; the stored field name is kept for persistence compatibility.
|
| 140 |
|
| 141 |
+
No-ops when the session's current model isn't gated, or when this
|
| 142 |
session has already been charged. Raises 429 when the user has hit
|
| 143 |
their daily cap.
|
| 144 |
"""
|
| 145 |
if agent_session.claude_counted:
|
| 146 |
return
|
| 147 |
model_name = agent_session.session.config.model_name
|
| 148 |
+
if not _is_gated_model(model_name):
|
| 149 |
return
|
| 150 |
user_id = user["user_id"]
|
| 151 |
cap = user_quotas.daily_cap_for(user.get("plan"))
|
|
|
|
| 154 |
raise HTTPException(
|
| 155 |
status_code=429,
|
| 156 |
detail={
|
| 157 |
+
"error": "premium_model_daily_cap",
|
| 158 |
"plan": user.get("plan", "free"),
|
| 159 |
"cap": cap,
|
| 160 |
"message": (
|
| 161 |
+
"Daily premium model limit reached. Upgrade to HF Pro for "
|
| 162 |
f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
|
| 163 |
),
|
| 164 |
},
|
|
|
|
| 334 |
behalf of the user.
|
| 335 |
|
| 336 |
Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
|
| 337 |
+
ids are rejected (400). The gated-model quota runs at message-submit
|
| 338 |
+
time, not here — spinning up a session to look around is free.
|
| 339 |
|
| 340 |
Returns 503 if the server or user has reached the session limit.
|
| 341 |
"""
|
|
|
|
| 355 |
if model and model not in valid_ids:
|
| 356 |
raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
|
| 357 |
|
| 358 |
+
# Deployed paid models are gated to HF staff; free and local-dev models pass through.
|
|
|
|
| 359 |
resolved_model = model or session_manager.config.model_name
|
| 360 |
+
await _require_hf_for_gated_model(request, resolved_model)
|
| 361 |
|
| 362 |
try:
|
| 363 |
session_id = await session_manager.create_session(
|
|
|
|
| 382 |
session's context as a user-role system note.
|
| 383 |
|
| 384 |
Optional ``"model"`` in the body overrides the session's LLM. The
|
| 385 |
+
gated-model quota runs at message-submit time, not here.
|
| 386 |
"""
|
| 387 |
messages = body.get("messages")
|
| 388 |
if not isinstance(messages, list) or not messages:
|
|
|
|
| 396 |
raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
|
| 397 |
|
| 398 |
resolved_model = model or session_manager.config.model_name
|
| 399 |
+
await _require_hf_for_gated_model(request, resolved_model)
|
| 400 |
|
| 401 |
try:
|
| 402 |
session_id = await session_manager.create_session(
|
|
|
|
| 444 |
|
| 445 |
Takes effect on the next LLM call in that session — other sessions
|
| 446 |
(including other browser tabs) are unaffected. Model switches don't
|
| 447 |
+
charge quota — the gated-model quota only fires at message-submit time.
|
| 448 |
|
| 449 |
+
Switching TO a gated deployed model requires HF org membership; free-model
|
| 450 |
+
and local-dev direct provider switches are unrestricted.
|
| 451 |
"""
|
| 452 |
agent_session = await _check_session_access(session_id, user, request)
|
| 453 |
model_id = body.get("model")
|
|
|
|
| 456 |
valid_ids = {m["id"] for m in AVAILABLE_MODELS}
|
| 457 |
if model_id not in valid_ids:
|
| 458 |
raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
|
| 459 |
+
await _require_hf_for_gated_model(request, model_id)
|
| 460 |
if not agent_session:
|
| 461 |
raise HTTPException(status_code=404, detail="Session not found")
|
| 462 |
await session_manager.update_session_model(session_id, model_id)
|
|
|
|
| 490 |
|
| 491 |
@router.get("/user/quota")
|
| 492 |
async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
|
| 493 |
+
"""Return the user's plan tier and today's premium-model quota state."""
|
| 494 |
plan = user.get("plan", "free")
|
| 495 |
used = await user_quotas.get_claude_used_today(user["user_id"])
|
| 496 |
cap = user_quotas.daily_cap_for(plan)
|
| 497 |
+
remaining = max(0, cap - used)
|
| 498 |
return {
|
| 499 |
"plan": plan,
|
| 500 |
+
"premium_used_today": used,
|
| 501 |
+
"premium_daily_cap": cap,
|
| 502 |
+
"premium_remaining": remaining,
|
| 503 |
}
|
| 504 |
|
| 505 |
|
|
|
|
| 546 |
) -> dict:
|
| 547 |
"""Submit user input to a session. Only accessible by the session owner."""
|
| 548 |
agent_session = await _check_session_access(request.session_id, user)
|
| 549 |
+
await _enforce_gated_model_quota(user, agent_session)
|
| 550 |
success = await session_manager.submit_user_input(request.session_id, request.text)
|
| 551 |
if not success:
|
| 552 |
raise HTTPException(status_code=404, detail="Session not found or inactive")
|
|
|
|
| 598 |
text = body.get("text")
|
| 599 |
approvals = body.get("approvals")
|
| 600 |
|
| 601 |
+
# Gate user-message sends against the daily gated-model quota. Approvals are
|
| 602 |
# continuations of an in-progress turn — the session was already charged
|
| 603 |
# on its first message, so we skip the gate there.
|
| 604 |
if text is not None and not approvals:
|
| 605 |
try:
|
| 606 |
+
await _enforce_gated_model_quota(user, agent_session)
|
| 607 |
except HTTPException:
|
| 608 |
broadcaster.unsubscribe(sub_id)
|
| 609 |
raise
|
backend/user_quotas.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
| 1 |
-
"""Daily quota for
|
| 2 |
|
| 3 |
-
Tracks per-user
|
| 4 |
-
user's HF plan. MongoDB is the source of truth when configured; the
|
| 5 |
in-process dict remains the fallback for local/dev/test runs.
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
Cap tiers:
|
| 12 |
free user → CLAUDE_FREE_DAILY (1)
|
|
|
|
| 1 |
+
"""Daily quota for premium model session creations.
|
| 2 |
|
| 3 |
+
Tracks per-user premium model session starts against a daily cap derived from
|
| 4 |
+
the user's HF plan. MongoDB is the source of truth when configured; the
|
| 5 |
in-process dict remains the fallback for local/dev/test runs.
|
| 6 |
|
| 7 |
+
The public names still say ``claude`` because this quota bucket originally
|
| 8 |
+
only covered Claude and the persisted session field uses that name.
|
| 9 |
+
|
| 10 |
+
Unit: session *creations*, not messages. A user who sends with a premium model
|
| 11 |
+
in a new session consumes one quota point; switching an already-counted session
|
| 12 |
+
back to a premium model doesn't (`AgentSession.claude_counted` guards that).
|
| 13 |
|
| 14 |
Cap tiers:
|
| 15 |
free user → CLAUDE_FREE_DAILY (1)
|
configs/frontend_agent_config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"model_name": "bedrock/us.anthropic.claude-opus-4-6-v1",
|
| 3 |
"save_sessions": true,
|
| 4 |
"session_dataset_repo": "smolagents/ml-intern-sessions",
|
| 5 |
"yolo_mode": false,
|
|
|
|
| 1 |
{
|
| 2 |
+
"model_name": "${ML_INTERN_CLAUDE_MODEL_ID:-bedrock/us.anthropic.claude-opus-4-6-v1}",
|
| 3 |
"save_sessions": true,
|
| 4 |
"session_dataset_repo": "smolagents/ml-intern-sessions",
|
| 5 |
"yolo_mode": false,
|
frontend/src/components/Chat/ChatInput.tsx
CHANGED
|
@@ -8,7 +8,13 @@ import { useUserQuota } from '@/hooks/useUserQuota';
|
|
| 8 |
import ClaudeCapDialog from '@/components/ClaudeCapDialog';
|
| 9 |
import JobsUpgradeDialog from '@/components/JobsUpgradeDialog';
|
| 10 |
import { useAgentStore } from '@/store/agentStore';
|
| 11 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
// Model configuration
|
| 14 |
interface ModelOption {
|
|
@@ -25,7 +31,7 @@ const getHfAvatarUrl = (modelId: string) => {
|
|
| 25 |
return `https://huggingface.co/api/avatars/${org}`;
|
| 26 |
};
|
| 27 |
|
| 28 |
-
const
|
| 29 |
{
|
| 30 |
id: 'kimi-k2.6',
|
| 31 |
name: 'Kimi K2.6',
|
|
@@ -42,6 +48,13 @@ const MODEL_OPTIONS: ModelOption[] = [
|
|
| 42 |
avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',
|
| 43 |
recommended: true,
|
| 44 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
{
|
| 46 |
id: 'minimax-m2.7',
|
| 47 |
name: 'MiniMax M2.7',
|
|
@@ -58,8 +71,8 @@ const MODEL_OPTIONS: ModelOption[] = [
|
|
| 58 |
},
|
| 59 |
];
|
| 60 |
|
| 61 |
-
const findModelByPath = (path: string): ModelOption | undefined => {
|
| 62 |
-
return
|
| 63 |
};
|
| 64 |
|
| 65 |
interface ChatInputProps {
|
|
@@ -72,16 +85,20 @@ interface ChatInputProps {
|
|
| 72 |
}
|
| 73 |
|
| 74 |
const isClaudeModel = (m: ModelOption) => isClaudePath(m.modelPath);
|
| 75 |
-
const
|
|
|
|
| 76 |
|
| 77 |
export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
|
| 78 |
const [input, setInput] = useState('');
|
| 79 |
const inputRef = useRef<HTMLTextAreaElement>(null);
|
| 80 |
-
const [
|
|
|
|
|
|
|
|
|
|
| 81 |
const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
|
| 82 |
const { quota, refresh: refreshQuota } = useUserQuota();
|
| 83 |
// The daily-cap dialog is triggered from two places: (a) a 429 returned
|
| 84 |
-
// from the chat transport when the user tries to send on
|
| 85 |
// surfaced via the agent-store flag — and (b) nothing else right now
|
| 86 |
// (switching models is free). Keeping the open state in the store means
|
| 87 |
// the hook layer can flip it without threading props through.
|
|
@@ -92,6 +109,41 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 92 |
const [awaitingTopUp, setAwaitingTopUp] = useState(false);
|
| 93 |
const lastSentRef = useRef<string>('');
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
// Model is per-session: fetch this tab's current model every time the
|
| 96 |
// session changes. Other tabs keep their own selections independently.
|
| 97 |
useEffect(() => {
|
|
@@ -102,7 +154,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 102 |
.then((data) => {
|
| 103 |
if (cancelled) return;
|
| 104 |
if (data?.model) {
|
| 105 |
-
const model = findModelByPath(data.model);
|
| 106 |
if (model) setSelectedModelId(model.id);
|
| 107 |
}
|
| 108 |
})
|
|
@@ -110,7 +162,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 110 |
return () => { cancelled = true; };
|
| 111 |
}, [sessionId]);
|
| 112 |
|
| 113 |
-
const selectedModel =
|
| 114 |
|
| 115 |
// Auto-focus the textarea when the session becomes ready
|
| 116 |
useEffect(() => {
|
|
@@ -127,7 +179,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 127 |
}
|
| 128 |
}, [input, disabled, onSend]);
|
| 129 |
|
| 130 |
-
// When the chat transport reports a
|
| 131 |
// text so the user doesn't lose their message.
|
| 132 |
useEffect(() => {
|
| 133 |
if (claudeQuotaExhausted && lastSentRef.current) {
|
|
@@ -178,12 +230,12 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 178 |
}, [setClaudeQuotaExhausted]);
|
| 179 |
|
| 180 |
// "Use a free model" — switch the current session to Kimi (or the first
|
| 181 |
-
// non-
|
| 182 |
const handleUseFreeModel = useCallback(async () => {
|
| 183 |
setClaudeQuotaExhausted(false);
|
| 184 |
if (!sessionId) return;
|
| 185 |
-
const free =
|
| 186 |
-
?? firstFreeModel();
|
| 187 |
try {
|
| 188 |
const res = await apiFetch(`/api/session/${sessionId}/model`, {
|
| 189 |
method: 'POST',
|
|
@@ -199,14 +251,14 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 199 |
}
|
| 200 |
}
|
| 201 |
} catch { /* ignore */ }
|
| 202 |
-
}, [sessionId, onSend, setClaudeQuotaExhausted]);
|
| 203 |
|
| 204 |
-
const
|
| 205 |
if (!sessionId) return;
|
| 206 |
try {
|
| 207 |
await apiFetch(`/api/pro-click/${sessionId}`, {
|
| 208 |
method: 'POST',
|
| 209 |
-
body: JSON.stringify({ source: '
|
| 210 |
});
|
| 211 |
} catch {
|
| 212 |
/* tracking is best-effort */
|
|
@@ -254,14 +306,14 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 254 |
return () => document.removeEventListener('visibilitychange', onVisible);
|
| 255 |
}, [awaitingTopUp, jobsUpgradeRequired, handleJobsRetry]);
|
| 256 |
|
| 257 |
-
// Hide the chip until the user has actually burned quota
|
| 258 |
-
//
|
| 259 |
-
const
|
| 260 |
-
if (!quota || quota.
|
| 261 |
if (quota.plan === 'free') {
|
| 262 |
-
return quota.
|
| 263 |
}
|
| 264 |
-
return `${quota.
|
| 265 |
})();
|
| 266 |
|
| 267 |
return (
|
|
@@ -426,7 +478,7 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 426 |
}
|
| 427 |
}}
|
| 428 |
>
|
| 429 |
-
{
|
| 430 |
<MenuItem
|
| 431 |
key={model.id}
|
| 432 |
onClick={() => handleSelectModel(model)}
|
|
@@ -462,9 +514,9 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 462 |
}}
|
| 463 |
/>
|
| 464 |
)}
|
| 465 |
-
{
|
| 466 |
<Chip
|
| 467 |
-
label={
|
| 468 |
size="small"
|
| 469 |
sx={{
|
| 470 |
height: '18px',
|
|
@@ -489,10 +541,10 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
|
|
| 489 |
<ClaudeCapDialog
|
| 490 |
open={claudeQuotaExhausted}
|
| 491 |
plan={quota?.plan ?? 'free'}
|
| 492 |
-
cap={quota?.
|
| 493 |
onClose={handleCapDialogClose}
|
| 494 |
onUseFreeModel={handleUseFreeModel}
|
| 495 |
-
onUpgrade={
|
| 496 |
/>
|
| 497 |
<JobsUpgradeDialog
|
| 498 |
open={!!jobsUpgradeRequired}
|
|
|
|
| 8 |
import ClaudeCapDialog from '@/components/ClaudeCapDialog';
|
| 9 |
import JobsUpgradeDialog from '@/components/JobsUpgradeDialog';
|
| 10 |
import { useAgentStore } from '@/store/agentStore';
|
| 11 |
+
import {
|
| 12 |
+
CLAUDE_MODEL_PATH,
|
| 13 |
+
FIRST_FREE_MODEL_PATH,
|
| 14 |
+
GPT_55_MODEL_PATH,
|
| 15 |
+
isClaudePath,
|
| 16 |
+
isPremiumPath,
|
| 17 |
+
} from '@/utils/model';
|
| 18 |
|
| 19 |
// Model configuration
|
| 20 |
interface ModelOption {
|
|
|
|
| 31 |
return `https://huggingface.co/api/avatars/${org}`;
|
| 32 |
};
|
| 33 |
|
| 34 |
+
const DEFAULT_MODEL_OPTIONS: ModelOption[] = [
|
| 35 |
{
|
| 36 |
id: 'kimi-k2.6',
|
| 37 |
name: 'Kimi K2.6',
|
|
|
|
| 48 |
avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',
|
| 49 |
recommended: true,
|
| 50 |
},
|
| 51 |
+
{
|
| 52 |
+
id: 'gpt-5.5',
|
| 53 |
+
name: 'GPT-5.5',
|
| 54 |
+
description: 'OpenAI',
|
| 55 |
+
modelPath: GPT_55_MODEL_PATH,
|
| 56 |
+
avatarUrl: 'https://huggingface.co/api/avatars/openai',
|
| 57 |
+
},
|
| 58 |
{
|
| 59 |
id: 'minimax-m2.7',
|
| 60 |
name: 'MiniMax M2.7',
|
|
|
|
| 71 |
},
|
| 72 |
];
|
| 73 |
|
| 74 |
+
const findModelByPath = (path: string, options: ModelOption[]): ModelOption | undefined => {
|
| 75 |
+
return options.find(m => m.modelPath === path || path?.includes(m.id));
|
| 76 |
};
|
| 77 |
|
| 78 |
interface ChatInputProps {
|
|
|
|
| 85 |
}
|
| 86 |
|
| 87 |
const isClaudeModel = (m: ModelOption) => isClaudePath(m.modelPath);
|
| 88 |
+
const isPremiumModel = (m: ModelOption) => isPremiumPath(m.modelPath);
|
| 89 |
+
const firstFreeModel = (options: ModelOption[]) => options.find(m => !isPremiumModel(m)) ?? options[0];
|
| 90 |
|
| 91 |
export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
|
| 92 |
const [input, setInput] = useState('');
|
| 93 |
const inputRef = useRef<HTMLTextAreaElement>(null);
|
| 94 |
+
const [modelOptions, setModelOptions] = useState<ModelOption[]>(DEFAULT_MODEL_OPTIONS);
|
| 95 |
+
const modelOptionsRef = useRef<ModelOption[]>(DEFAULT_MODEL_OPTIONS);
|
| 96 |
+
const sessionIdRef = useRef<string | undefined>(sessionId);
|
| 97 |
+
const [selectedModelId, setSelectedModelId] = useState<string>(DEFAULT_MODEL_OPTIONS[0].id);
|
| 98 |
const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
|
| 99 |
const { quota, refresh: refreshQuota } = useUserQuota();
|
| 100 |
// The daily-cap dialog is triggered from two places: (a) a 429 returned
|
| 101 |
+
// from the chat transport when the user tries to send on a premium model over cap —
|
| 102 |
// surfaced via the agent-store flag — and (b) nothing else right now
|
| 103 |
// (switching models is free). Keeping the open state in the store means
|
| 104 |
// the hook layer can flip it without threading props through.
|
|
|
|
| 109 |
const [awaitingTopUp, setAwaitingTopUp] = useState(false);
|
| 110 |
const lastSentRef = useRef<string>('');
|
| 111 |
|
| 112 |
+
useEffect(() => {
|
| 113 |
+
modelOptionsRef.current = modelOptions;
|
| 114 |
+
}, [modelOptions]);
|
| 115 |
+
|
| 116 |
+
useEffect(() => {
|
| 117 |
+
sessionIdRef.current = sessionId;
|
| 118 |
+
}, [sessionId]);
|
| 119 |
+
|
| 120 |
+
useEffect(() => {
|
| 121 |
+
let cancelled = false;
|
| 122 |
+
apiFetch('/api/config/model')
|
| 123 |
+
.then((res) => (res.ok ? res.json() : null))
|
| 124 |
+
.then((data) => {
|
| 125 |
+
if (cancelled || !data?.available) return;
|
| 126 |
+
const claude = data.available.find((m: { provider?: string; id?: string }) => (
|
| 127 |
+
m.provider === 'anthropic' && m.id
|
| 128 |
+
));
|
| 129 |
+
if (!claude?.id) return;
|
| 130 |
+
|
| 131 |
+
const next = DEFAULT_MODEL_OPTIONS.map((option) => (
|
| 132 |
+
isClaudeModel(option)
|
| 133 |
+
? { ...option, modelPath: claude.id, name: claude.label ?? option.name }
|
| 134 |
+
: option
|
| 135 |
+
));
|
| 136 |
+
modelOptionsRef.current = next;
|
| 137 |
+
setModelOptions(next);
|
| 138 |
+
if (!sessionIdRef.current) {
|
| 139 |
+
const current = data.current ? findModelByPath(data.current, next) : null;
|
| 140 |
+
if (current) setSelectedModelId(current.id);
|
| 141 |
+
}
|
| 142 |
+
})
|
| 143 |
+
.catch(() => { /* ignore */ });
|
| 144 |
+
return () => { cancelled = true; };
|
| 145 |
+
}, []);
|
| 146 |
+
|
| 147 |
// Model is per-session: fetch this tab's current model every time the
|
| 148 |
// session changes. Other tabs keep their own selections independently.
|
| 149 |
useEffect(() => {
|
|
|
|
| 154 |
.then((data) => {
|
| 155 |
if (cancelled) return;
|
| 156 |
if (data?.model) {
|
| 157 |
+
const model = findModelByPath(data.model, modelOptionsRef.current);
|
| 158 |
if (model) setSelectedModelId(model.id);
|
| 159 |
}
|
| 160 |
})
|
|
|
|
| 162 |
return () => { cancelled = true; };
|
| 163 |
}, [sessionId]);
|
| 164 |
|
| 165 |
+
const selectedModel = modelOptions.find(m => m.id === selectedModelId) || modelOptions[0];
|
| 166 |
|
| 167 |
// Auto-focus the textarea when the session becomes ready
|
| 168 |
useEffect(() => {
|
|
|
|
| 179 |
}
|
| 180 |
}, [input, disabled, onSend]);
|
| 181 |
|
| 182 |
+
// When the chat transport reports a premium-model quota 429, restore the typed
|
| 183 |
// text so the user doesn't lose their message.
|
| 184 |
useEffect(() => {
|
| 185 |
if (claudeQuotaExhausted && lastSentRef.current) {
|
|
|
|
| 230 |
}, [setClaudeQuotaExhausted]);
|
| 231 |
|
| 232 |
// "Use a free model" — switch the current session to Kimi (or the first
|
| 233 |
+
// non-premium option) and auto-retry the send that tripped the cap.
|
| 234 |
const handleUseFreeModel = useCallback(async () => {
|
| 235 |
setClaudeQuotaExhausted(false);
|
| 236 |
if (!sessionId) return;
|
| 237 |
+
const free = modelOptions.find(m => m.modelPath === FIRST_FREE_MODEL_PATH)
|
| 238 |
+
?? firstFreeModel(modelOptions);
|
| 239 |
try {
|
| 240 |
const res = await apiFetch(`/api/session/${sessionId}/model`, {
|
| 241 |
method: 'POST',
|
|
|
|
| 251 |
}
|
| 252 |
}
|
| 253 |
} catch { /* ignore */ }
|
| 254 |
+
}, [sessionId, onSend, setClaudeQuotaExhausted, modelOptions]);
|
| 255 |
|
| 256 |
+
const handlePremiumUpgradeClick = useCallback(async () => {
|
| 257 |
if (!sessionId) return;
|
| 258 |
try {
|
| 259 |
await apiFetch(`/api/pro-click/${sessionId}`, {
|
| 260 |
method: 'POST',
|
| 261 |
+
body: JSON.stringify({ source: 'premium_cap_dialog', target: 'pro_pricing' }),
|
| 262 |
});
|
| 263 |
} catch {
|
| 264 |
/* tracking is best-effort */
|
|
|
|
| 306 |
return () => document.removeEventListener('visibilitychange', onVisible);
|
| 307 |
}, [awaitingTopUp, jobsUpgradeRequired, handleJobsRetry]);
|
| 308 |
|
| 309 |
+
// Hide the chip until the user has actually burned quota; opening a
|
| 310 |
+
// premium-model session without sending should not populate a counter.
|
| 311 |
+
const premiumChip = (() => {
|
| 312 |
+
if (!quota || quota.premiumUsedToday === 0) return null;
|
| 313 |
if (quota.plan === 'free') {
|
| 314 |
+
return quota.premiumRemaining > 0 ? 'Free today' : 'Pro only';
|
| 315 |
}
|
| 316 |
+
return `${quota.premiumUsedToday}/${quota.premiumDailyCap} today`;
|
| 317 |
})();
|
| 318 |
|
| 319 |
return (
|
|
|
|
| 478 |
}
|
| 479 |
}}
|
| 480 |
>
|
| 481 |
+
{modelOptions.map((model) => (
|
| 482 |
<MenuItem
|
| 483 |
key={model.id}
|
| 484 |
onClick={() => handleSelectModel(model)}
|
|
|
|
| 514 |
}}
|
| 515 |
/>
|
| 516 |
)}
|
| 517 |
+
{isPremiumModel(model) && premiumChip && (
|
| 518 |
<Chip
|
| 519 |
+
label={premiumChip}
|
| 520 |
size="small"
|
| 521 |
sx={{
|
| 522 |
height: '18px',
|
|
|
|
| 541 |
<ClaudeCapDialog
|
| 542 |
open={claudeQuotaExhausted}
|
| 543 |
plan={quota?.plan ?? 'free'}
|
| 544 |
+
cap={quota?.premiumDailyCap ?? 1}
|
| 545 |
onClose={handleCapDialogClose}
|
| 546 |
onUseFreeModel={handleUseFreeModel}
|
| 547 |
+
onUpgrade={handlePremiumUpgradeClick}
|
| 548 |
/>
|
| 549 |
<JobsUpgradeDialog
|
| 550 |
open={!!jobsUpgradeRequired}
|
frontend/src/components/ClaudeCapDialog.tsx
CHANGED
|
@@ -55,15 +55,15 @@ export default function ClaudeCapDialog({
|
|
| 55 |
<DialogTitle
|
| 56 |
sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}
|
| 57 |
>
|
| 58 |
-
You've hit your
|
| 59 |
</DialogTitle>
|
| 60 |
<DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>
|
| 61 |
<DialogContentText
|
| 62 |
sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
|
| 63 |
>
|
| 64 |
-
Opus
|
| 65 |
-
{cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin
|
| 66 |
-
|
| 67 |
</DialogContentText>
|
| 68 |
<Box
|
| 69 |
sx={{
|
|
@@ -85,14 +85,14 @@ export default function ClaudeCapDialog({
|
|
| 85 |
letterSpacing: '0.02em',
|
| 86 |
}}
|
| 87 |
>
|
| 88 |
-
HF Pro ($9/mo) — more
|
| 89 |
</Typography>
|
| 90 |
<Typography
|
| 91 |
variant="caption"
|
| 92 |
sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
|
| 93 |
>
|
| 94 |
-
{PRO_CAP}
|
| 95 |
-
and priority on Spaces hardware.
|
| 96 |
</Typography>
|
| 97 |
</Box>
|
| 98 |
</DialogContent>
|
|
|
|
| 55 |
<DialogTitle
|
| 56 |
sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}
|
| 57 |
>
|
| 58 |
+
You've hit your premium model limit
|
| 59 |
</DialogTitle>
|
| 60 |
<DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>
|
| 61 |
<DialogContentText
|
| 62 |
sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
|
| 63 |
>
|
| 64 |
+
Opus and GPT-5.5 are expensive to run, so we cap premium models at {cap}{' '}
|
| 65 |
+
{cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin
|
| 66 |
+
instead.
|
| 67 |
</DialogContentText>
|
| 68 |
<Box
|
| 69 |
sx={{
|
|
|
|
| 85 |
letterSpacing: '0.02em',
|
| 86 |
}}
|
| 87 |
>
|
| 88 |
+
HF Pro ($9/mo) — more premium model sessions
|
| 89 |
</Typography>
|
| 90 |
<Typography
|
| 91 |
variant="caption"
|
| 92 |
sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
|
| 93 |
>
|
| 94 |
+
{PRO_CAP} premium model sessions/day here, 20× HF Inference credits,
|
| 95 |
+
ZeroGPU access, and priority on Spaces hardware.
|
| 96 |
</Typography>
|
| 97 |
</Box>
|
| 98 |
</DialogContent>
|
frontend/src/hooks/useAgentChat.ts
CHANGED
|
@@ -346,7 +346,7 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
|
|
| 346 |
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
|
| 347 |
onError: (error) => {
|
| 348 |
updateSession(sessionId, { isProcessing: false });
|
| 349 |
-
//
|
| 350 |
// banner. Transport marks the error with this sentinel.
|
| 351 |
if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {
|
| 352 |
if (isActiveRef.current) {
|
|
|
|
| 346 |
sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
|
| 347 |
onError: (error) => {
|
| 348 |
updateSession(sessionId, { isProcessing: false });
|
| 349 |
+
// Premium-model daily cap: open the cap dialog instead of the generic error
|
| 350 |
// banner. Transport marks the error with this sentinel.
|
| 351 |
if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {
|
| 352 |
if (isActiveRef.current) {
|
frontend/src/hooks/useUserQuota.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
/**
|
| 2 |
-
* Reads the current user's
|
| 3 |
*
|
| 4 |
* Fetches once when the user becomes authenticated, and exposes a `refresh()`
|
| 5 |
* that callers invoke after a successful session-create / model-switch so the
|
|
@@ -13,9 +13,9 @@ export type PlanTier = 'free' | 'pro' | 'org';
|
|
| 13 |
|
| 14 |
export interface UserQuota {
|
| 15 |
plan: PlanTier;
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
}
|
| 20 |
|
| 21 |
export function useUserQuota() {
|
|
@@ -32,9 +32,9 @@ export function useUserQuota() {
|
|
| 32 |
const data = await res.json();
|
| 33 |
setQuota({
|
| 34 |
plan: (data.plan ?? 'free') as PlanTier,
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
});
|
| 39 |
} catch {
|
| 40 |
/* backend unreachable — leave previous value */
|
|
|
|
| 1 |
/**
|
| 2 |
+
* Reads the current user's premium-model daily quota + plan tier from the backend.
|
| 3 |
*
|
| 4 |
* Fetches once when the user becomes authenticated, and exposes a `refresh()`
|
| 5 |
* that callers invoke after a successful session-create / model-switch so the
|
|
|
|
| 13 |
|
| 14 |
export interface UserQuota {
|
| 15 |
plan: PlanTier;
|
| 16 |
+
premiumUsedToday: number;
|
| 17 |
+
premiumDailyCap: number;
|
| 18 |
+
premiumRemaining: number;
|
| 19 |
}
|
| 20 |
|
| 21 |
export function useUserQuota() {
|
|
|
|
| 32 |
const data = await res.json();
|
| 33 |
setQuota({
|
| 34 |
plan: (data.plan ?? 'free') as PlanTier,
|
| 35 |
+
premiumUsedToday: data.premium_used_today ?? 0,
|
| 36 |
+
premiumDailyCap: data.premium_daily_cap ?? 1,
|
| 37 |
+
premiumRemaining: data.premium_remaining ?? 0,
|
| 38 |
});
|
| 39 |
} catch {
|
| 40 |
/* backend unreachable — leave previous value */
|
frontend/src/lib/sse-chat-transport.ts
CHANGED
|
@@ -402,7 +402,7 @@ export class SSEChatTransport implements ChatTransport<UIMessage> {
|
|
| 402 |
this.sideChannel.onSessionDead(sessionId);
|
| 403 |
}
|
| 404 |
if (response.status === 429) {
|
| 405 |
-
//
|
| 406 |
// for useAgentChat's onError handler, which surfaces the cap dialog
|
| 407 |
// instead of a generic error banner.
|
| 408 |
throw new Error('CLAUDE_QUOTA_EXHAUSTED');
|
|
|
|
| 402 |
this.sideChannel.onSessionDead(sessionId);
|
| 403 |
}
|
| 404 |
if (response.status === 429) {
|
| 405 |
+
// Premium-model daily quota gate tripped. The prefix is the detection marker
|
| 406 |
// for useAgentChat's onError handler, which surfaces the cap dialog
|
| 407 |
// instead of a generic error banner.
|
| 408 |
throw new Error('CLAUDE_QUOTA_EXHAUSTED');
|
frontend/src/store/agentStore.ts
CHANGED
|
@@ -113,7 +113,7 @@ interface AgentStore {
|
|
| 113 |
user: User | null;
|
| 114 |
error: string | null;
|
| 115 |
llmHealthError: LLMHealthError | null;
|
| 116 |
-
/** Set when a
|
| 117 |
claudeQuotaExhausted: boolean;
|
| 118 |
jobsUpgradeRequired: JobsUpgradeState | null;
|
| 119 |
|
|
|
|
| 113 |
user: User | null;
|
| 114 |
error: string | null;
|
| 115 |
llmHealthError: LLMHealthError | null;
|
| 116 |
+
/** Set when a premium-model send hits the daily quota; ChatInput opens the cap dialog. */
|
| 117 |
claudeQuotaExhausted: boolean;
|
| 118 |
jobsUpgradeRequired: JobsUpgradeState | null;
|
| 119 |
|
frontend/src/utils/model.ts
CHANGED
|
@@ -1,14 +1,19 @@
|
|
| 1 |
/**
|
| 2 |
* Shared model-id constants used by session-create call sites and the
|
| 3 |
-
*
|
| 4 |
*
|
| 5 |
* Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and
|
| 6 |
* AVAILABLE_MODELS in backend/routes/agent.py.
|
| 7 |
*/
|
| 8 |
|
| 9 |
export const CLAUDE_MODEL_PATH = 'bedrock/us.anthropic.claude-opus-4-6-v1';
|
|
|
|
| 10 |
export const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';
|
| 11 |
|
| 12 |
export function isClaudePath(modelPath: string | undefined): boolean {
|
| 13 |
return !!modelPath && modelPath.includes('anthropic');
|
| 14 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
/**
|
| 2 |
* Shared model-id constants used by session-create call sites and the
|
| 3 |
+
* premium-model cap dialog "Use a free model" escape hatch.
|
| 4 |
*
|
| 5 |
* Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and
|
| 6 |
* AVAILABLE_MODELS in backend/routes/agent.py.
|
| 7 |
*/
|
| 8 |
|
| 9 |
export const CLAUDE_MODEL_PATH = 'bedrock/us.anthropic.claude-opus-4-6-v1';
|
| 10 |
+
export const GPT_55_MODEL_PATH = 'openai/gpt-5.5';
|
| 11 |
export const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';
|
| 12 |
|
| 13 |
export function isClaudePath(modelPath: string | undefined): boolean {
|
| 14 |
return !!modelPath && modelPath.includes('anthropic');
|
| 15 |
}
|
| 16 |
+
|
| 17 |
+
export function isPremiumPath(modelPath: string | undefined): boolean {
|
| 18 |
+
return modelPath === CLAUDE_MODEL_PATH || modelPath === GPT_55_MODEL_PATH;
|
| 19 |
+
}
|
tests/unit/test_agent_model_gating.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for gated model handling in backend/routes/agent.py."""
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from types import SimpleNamespace
|
| 6 |
+
|
| 7 |
+
import pytest
|
| 8 |
+
from fastapi import HTTPException
|
| 9 |
+
|
| 10 |
+
_BACKEND_DIR = Path(__file__).resolve().parent.parent.parent / "backend"
|
| 11 |
+
if str(_BACKEND_DIR) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_BACKEND_DIR))
|
| 13 |
+
|
| 14 |
+
from routes import agent # noqa: E402
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@pytest.fixture(autouse=True)
|
| 18 |
+
def _reset_quota_store():
|
| 19 |
+
agent.user_quotas._reset_for_tests()
|
| 20 |
+
yield
|
| 21 |
+
agent.user_quotas._reset_for_tests()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def test_gated_model_predicate_includes_bedrock_claude_and_gpt55_only():
|
| 25 |
+
assert agent._is_gated_model("bedrock/us.anthropic.claude-opus-4-6-v1")
|
| 26 |
+
assert agent._is_gated_model("openai/gpt-5.5")
|
| 27 |
+
assert not agent._is_gated_model("anthropic/claude-opus-4-6")
|
| 28 |
+
assert not agent._is_gated_model("moonshotai/Kimi-K2.6")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@pytest.mark.asyncio
|
| 32 |
+
async def test_gated_model_gate_rejects_gpt55_for_non_hf_user(monkeypatch):
|
| 33 |
+
async def fake_require_hf_org_member(_request):
|
| 34 |
+
return False
|
| 35 |
+
|
| 36 |
+
monkeypatch.setattr(agent, "require_huggingface_org_member", fake_require_hf_org_member)
|
| 37 |
+
|
| 38 |
+
with pytest.raises(HTTPException) as exc_info:
|
| 39 |
+
await agent._require_hf_for_gated_model(None, "openai/gpt-5.5")
|
| 40 |
+
|
| 41 |
+
assert exc_info.value.status_code == 403
|
| 42 |
+
assert exc_info.value.detail["error"] == "premium_model_restricted"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@pytest.mark.asyncio
|
| 46 |
+
async def test_ungated_models_skip_hf_membership_check(monkeypatch):
|
| 47 |
+
async def fail_if_called(_request):
|
| 48 |
+
raise AssertionError("ungated models must not require HF org membership")
|
| 49 |
+
|
| 50 |
+
monkeypatch.setattr(agent, "require_huggingface_org_member", fail_if_called)
|
| 51 |
+
|
| 52 |
+
await agent._require_hf_for_gated_model(None, "moonshotai/Kimi-K2.6")
|
| 53 |
+
await agent._require_hf_for_gated_model(None, "anthropic/claude-opus-4-6")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@pytest.mark.asyncio
|
| 57 |
+
async def test_gated_quota_charges_gpt55(monkeypatch):
|
| 58 |
+
persisted = []
|
| 59 |
+
|
| 60 |
+
async def fake_persist_session_snapshot(agent_session):
|
| 61 |
+
persisted.append(agent_session)
|
| 62 |
+
|
| 63 |
+
monkeypatch.setattr(
|
| 64 |
+
agent.session_manager,
|
| 65 |
+
"persist_session_snapshot",
|
| 66 |
+
fake_persist_session_snapshot,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
agent_session = SimpleNamespace(
|
| 70 |
+
claude_counted=False,
|
| 71 |
+
session=SimpleNamespace(
|
| 72 |
+
config=SimpleNamespace(model_name="openai/gpt-5.5"),
|
| 73 |
+
),
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
await agent._enforce_gated_model_quota(
|
| 77 |
+
{"user_id": "u1", "plan": "free"},
|
| 78 |
+
agent_session,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
assert agent_session.claude_counted is True
|
| 82 |
+
assert persisted == [agent_session]
|
| 83 |
+
assert await agent.user_quotas.get_claude_used_today("u1") == 1
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@pytest.mark.asyncio
|
| 87 |
+
async def test_gated_quota_skips_direct_anthropic(monkeypatch):
|
| 88 |
+
async def fail_if_persisted(_agent_session):
|
| 89 |
+
raise AssertionError("direct Anthropic should not consume deployed gated quota")
|
| 90 |
+
|
| 91 |
+
monkeypatch.setattr(
|
| 92 |
+
agent.session_manager,
|
| 93 |
+
"persist_session_snapshot",
|
| 94 |
+
fail_if_persisted,
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
agent_session = SimpleNamespace(
|
| 98 |
+
claude_counted=False,
|
| 99 |
+
session=SimpleNamespace(
|
| 100 |
+
config=SimpleNamespace(model_name="anthropic/claude-opus-4-6"),
|
| 101 |
+
),
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
await agent._enforce_gated_model_quota(
|
| 105 |
+
{"user_id": "u1", "plan": "free"},
|
| 106 |
+
agent_session,
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
assert agent_session.claude_counted is False
|
| 110 |
+
assert await agent.user_quotas.get_claude_used_today("u1") == 0
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@pytest.mark.asyncio
|
| 114 |
+
async def test_user_quota_response_uses_premium_fields_only(monkeypatch):
|
| 115 |
+
async def fake_get_used_today(user_id):
|
| 116 |
+
assert user_id == "u1"
|
| 117 |
+
return 2
|
| 118 |
+
|
| 119 |
+
monkeypatch.setattr(agent.user_quotas, "get_claude_used_today", fake_get_used_today)
|
| 120 |
+
monkeypatch.setattr(agent.user_quotas, "daily_cap_for", lambda plan: 5)
|
| 121 |
+
|
| 122 |
+
response = await agent.get_user_quota({"user_id": "u1", "plan": "pro"})
|
| 123 |
+
|
| 124 |
+
assert response == {
|
| 125 |
+
"plan": "pro",
|
| 126 |
+
"premium_used_today": 2,
|
| 127 |
+
"premium_daily_cap": 5,
|
| 128 |
+
"premium_remaining": 3,
|
| 129 |
+
}
|