Guillaume Salou commited on
Commit
82bad21
·
unverified ·
1 Parent(s): e152e8b

quick wins, after internal audit (#229)

Browse files

* fix(api): cap /api/submit text to 100k chars

Pydantic-level guard on SubmitRequest.text. Prevents a single oversized
user message from bloating the conversation context for every subsequent
turn (cost-amplification on Bedrock-tier models). 100k chars ≈ 25k tokens,
well above any reasonable user prompt.

* chore(api): disable FastAPI auto-docs on HF Spaces

Gated on SPACE_ID env var, which is set by the HF Spaces runtime but not
in local dev. /docs, /redoc, /openapi.json return 404 on the deployed
Space; localhost keeps them for development convenience.

Reduces public information disclosure of the full API surface.

* fix(api): check session ownership before body validation on /truncate

POST /api/truncate/{session_id} previously returned 422 (with the schema
detail revealing 'user_message_index' is required) even when the caller
didn't own the session, leaking the route's expected payload shape to
unauthenticated probers.

Now parses the body manually after _check_session_access, so non-owners
get a 404 first.

* fix(api): apply ownership-before-validation to /submit; preserve FastAPI 422 schema

Address two P1 PR-bot findings:

1. /submit had the same ordering inversion as /truncate: with the new
min_length=1 / max_length=100_000 on SubmitRequest.text, an authenticated
non-owner sending {session_id: victim, text: ''} would get a 422 leaking
the constraint instead of a 404. Refactor to manual-parse pattern: pull
session_id, run _check_session_access, then validate the full body.

2. The previous /truncate manual-parse used 'except Exception: HTTPException'
which str()-ified Pydantic ValidationError into a multi-line blob and
broke FastAPI's standard 422 schema. Now catches ValidationError
specifically and re-raises as RequestValidationError so clients see the
normal structured detail list.

* style: ruff format

Apply ruff format on /submit RequestValidationError dict literal that
had been written on one line. No semantic change.

Files changed (3) hide show
  1. backend/main.py +8 -0
  2. backend/models.py +4 -1
  3. backend/routes/agent.py +47 -5
backend/main.py CHANGED
@@ -65,11 +65,19 @@ async def lifespan(app: FastAPI):
65
  await session_manager.close()
66
 
67
 
 
 
 
 
 
68
  app = FastAPI(
69
  title="HF Agent",
70
  description="ML Engineering Assistant API",
71
  version="1.0.0",
72
  lifespan=lifespan,
 
 
 
73
  )
74
 
75
  # CORS middleware for development
 
65
  await session_manager.close()
66
 
67
 
68
+ # Disable FastAPI auto-docs when running on HF Spaces (SPACE_ID is set by the
69
+ # platform) to avoid exposing the full API surface to anonymous visitors. Local
70
+ # dev keeps /docs and /redoc available.
71
+ _DOCS_DISABLED = os.environ.get("SPACE_ID") is not None
72
+
73
  app = FastAPI(
74
  title="HF Agent",
75
  description="ML Engineering Assistant API",
76
  version="1.0.0",
77
  lifespan=lifespan,
78
+ docs_url=None if _DOCS_DISABLED else "/docs",
79
+ redoc_url=None if _DOCS_DISABLED else "/redoc",
80
+ openapi_url=None if _DOCS_DISABLED else "/openapi.json",
81
  )
82
 
83
  # CORS middleware for development
backend/models.py CHANGED
@@ -52,7 +52,10 @@ class SubmitRequest(BaseModel):
52
  """Request to submit user input."""
53
 
54
  session_id: str
55
- text: str
 
 
 
56
 
57
 
58
  class TruncateRequest(BaseModel):
 
52
  """Request to submit user input."""
53
 
54
  session_id: str
55
+ # Cap text size to prevent context-bloat / cost-amplification: a malicious
56
+ # or runaway client could otherwise attach megabytes that then ride along
57
+ # in every subsequent turn until /api/compact is called.
58
+ text: str = Field(..., min_length=1, max_length=100_000)
59
 
60
 
61
  class TruncateRequest(BaseModel):
backend/routes/agent.py CHANGED
@@ -20,8 +20,10 @@ from fastapi import (
20
  HTTPException,
21
  Request,
22
  )
 
23
  from fastapi.responses import StreamingResponse
24
  from litellm import acompletion
 
25
  from models import (
26
  ApprovalRequest,
27
  HealthResponse,
@@ -654,15 +656,41 @@ async def delete_session(
654
 
655
  @router.post("/submit")
656
  async def submit_input(
657
- request: SubmitRequest, user: dict = Depends(get_current_user)
658
  ) -> dict:
659
  """Submit user input to a session. Only accessible by the session owner."""
660
- agent_session = await _check_session_access(request.session_id, user)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661
  await _enforce_gated_model_quota(user, agent_session)
662
- success = await session_manager.submit_user_input(request.session_id, request.text)
663
  if not success:
664
  raise HTTPException(status_code=404, detail="Session not found or inactive")
665
- return {"status": "submitted", "session_id": request.session_id}
666
 
667
 
668
  @router.post("/approve")
@@ -934,10 +962,24 @@ async def undo_session(session_id: str, user: dict = Depends(get_current_user))
934
 
935
  @router.post("/truncate/{session_id}")
936
  async def truncate_session(
937
- session_id: str, body: TruncateRequest, user: dict = Depends(get_current_user)
 
 
938
  ) -> dict:
939
  """Truncate conversation to before a specific user message."""
 
 
 
940
  await _check_session_access(session_id, user)
 
 
 
 
 
 
 
 
 
941
  success = await session_manager.truncate(session_id, body.user_message_index)
942
  if not success:
943
  raise HTTPException(
 
20
  HTTPException,
21
  Request,
22
  )
23
+ from fastapi.exceptions import RequestValidationError
24
  from fastapi.responses import StreamingResponse
25
  from litellm import acompletion
26
+ from pydantic import ValidationError
27
  from models import (
28
  ApprovalRequest,
29
  HealthResponse,
 
656
 
657
  @router.post("/submit")
658
  async def submit_input(
659
+ request: Request, user: dict = Depends(get_current_user)
660
  ) -> dict:
661
  """Submit user input to a session. Only accessible by the session owner."""
662
+ # Parse the body manually so session ownership can be checked before the
663
+ # text-length constraints fire — otherwise a non-owner sending an empty
664
+ # or oversized text gets a 422 leaking the constraint instead of the 404
665
+ # they'd get for any other access to a session they don't own.
666
+ try:
667
+ payload = await request.json()
668
+ except (json.JSONDecodeError, TypeError) as exc:
669
+ raise HTTPException(status_code=422, detail=str(exc))
670
+ if not isinstance(payload, dict):
671
+ raise HTTPException(status_code=422, detail="Body must be a JSON object")
672
+ raw_session_id = payload.get("session_id")
673
+ if not isinstance(raw_session_id, str) or not raw_session_id:
674
+ raise RequestValidationError(
675
+ [
676
+ {
677
+ "type": "missing",
678
+ "loc": ("body", "session_id"),
679
+ "msg": "Field required",
680
+ "input": payload,
681
+ }
682
+ ]
683
+ )
684
+ agent_session = await _check_session_access(raw_session_id, user)
685
+ try:
686
+ body = SubmitRequest(**payload)
687
+ except ValidationError as exc:
688
+ raise RequestValidationError(exc.errors()) from exc
689
  await _enforce_gated_model_quota(user, agent_session)
690
+ success = await session_manager.submit_user_input(body.session_id, body.text)
691
  if not success:
692
  raise HTTPException(status_code=404, detail="Session not found or inactive")
693
+ return {"status": "submitted", "session_id": body.session_id}
694
 
695
 
696
  @router.post("/approve")
 
962
 
963
  @router.post("/truncate/{session_id}")
964
  async def truncate_session(
965
+ session_id: str,
966
+ request: Request,
967
+ user: dict = Depends(get_current_user),
968
  ) -> dict:
969
  """Truncate conversation to before a specific user message."""
970
+ # Check session ownership before parsing the request body so a 404 on a
971
+ # non-existent / non-owned session_id beats the 422 schema-validation error
972
+ # (otherwise the response leaks the required field name to non-owners).
973
  await _check_session_access(session_id, user)
974
+ try:
975
+ body = TruncateRequest(**(await request.json()))
976
+ except ValidationError as exc:
977
+ # Re-raise as RequestValidationError so FastAPI returns its standard
978
+ # structured 422 schema (`{"detail": [{"type":..., "loc":..., ...}]}`)
979
+ # instead of a string-stringified Pydantic dump.
980
+ raise RequestValidationError(exc.errors()) from exc
981
+ except (json.JSONDecodeError, TypeError) as exc:
982
+ raise HTTPException(status_code=422, detail=str(exc))
983
  success = await session_manager.truncate(session_id, body.user_message_index)
984
  if not success:
985
  raise HTTPException(