Spaces:

Sulitha
/

sulitha-agent

Paused

App Files Files Community

sulitha-agent / app.py

Sulitha

initial deployment

7df55e6 about 2 months ago

raw

history blame contribute delete

4.3 kB

	"""
	app.py
	======
	FastAPI entrypoint.
	- Loads .env for local development
	- CORS configured for portfolio domain only
	- Rate limiting via slowapi (10 req/min per IP)
	- X-Portfolio-Key header check on /chat (set server-side by Vercel proxy)
	- FAISS index built at startup
	- Session cleanup background task started at startup
	"""

	import asyncio
	import logging
	import uuid
	from contextlib import asynccontextmanager

	from dotenv import load_dotenv
	load_dotenv(override=True) # no-op on HuggingFace (env vars injected by HF Secrets)

	from fastapi import FastAPI, Request, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from pydantic import BaseModel
	from slowapi.errors import RateLimitExceeded
	from slowapi.middleware import SlowAPIMiddleware

	import agent
	import session as session_store
	from security import ALLOWED_ORIGINS, limiter, verify_portfolio_key

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	# ── Lifespan ──────────────────────────────────────────────────────────────────

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	logger.info("Building FAISS knowledge index...")
	agent.build_index()
	logger.info("Starting session cleanup task...")
	cleanup_task = asyncio.create_task(session_store.cleanup_loop())
	yield
	cleanup_task.cancel()


	# ── App ───────────────────────────────────────────────────────────────────────

	app = FastAPI(
	title = "Sulitha's Portfolio Agent",
	description = "AI agent that represents Sulitha Nulaksha Bandara.",
	version = "1.0.0",
	lifespan = lifespan,
	docs_url = None,
	redoc_url = None,
	)

	app.state.limiter = limiter
	app.add_middleware(SlowAPIMiddleware)

	app.add_middleware(
	CORSMiddleware,
	allow_origins = ALLOWED_ORIGINS,
	allow_credentials = False,
	allow_methods = ["POST", "GET"],
	allow_headers = ["Content-Type", "X-Portfolio-Key"],
	)


	# ── Error handlers ────────────────────────────────────────────────────────────

	@app.exception_handler(RateLimitExceeded)
	async def rate_limit_handler(request: Request, exc: RateLimitExceeded):
	return JSONResponse(
	status_code = 429,
	content = {"detail": "Too many messages — please wait a moment and try again."},
	)


	# ── Schemas ───────────────────────────────────────────────────────────────────

	class ChatRequest(BaseModel):
	message: str
	session_id: str \| None = None # None on first message; frontend stores and resends


	class ChatResponse(BaseModel):
	reply: str
	session_id: str


	# ── Routes ────────────────────────────────────────────────────────────────────

	@app.get("/health")
	async def health():
	return {"status": "ok"}


	@app.post("/chat", response_model=ChatResponse)
	@limiter.limit("10/minute")
	async def chat_endpoint(request: Request, body: ChatRequest):
	"""
	Security order:
	1. X-Portfolio-Key header — rejects anything not coming from the Vercel proxy
	2. Rate limit — 10 requests/min per IP
	3. Session message cap — checked inside agent.chat
	"""
	verify_portfolio_key(request)

	session_id = body.session_id or str(uuid.uuid4())

	message = body.message.strip()
	if not message:
	raise HTTPException(status_code=400, detail="Message cannot be empty.")
	if len(message) > 1000:
	raise HTTPException(status_code=400, detail="Message too long (max 1000 chars).")

	reply = await agent.chat(message, session_id)

	return ChatResponse(reply=reply, session_id=session_id)