sulitha-agent / app.py
Sulitha's picture
initial deployment
7df55e6
"""
app.py
======
FastAPI entrypoint.
- Loads .env for local development
- CORS configured for portfolio domain only
- Rate limiting via slowapi (10 req/min per IP)
- X-Portfolio-Key header check on /chat (set server-side by Vercel proxy)
- FAISS index built at startup
- Session cleanup background task started at startup
"""
import asyncio
import logging
import uuid
from contextlib import asynccontextmanager
from dotenv import load_dotenv
load_dotenv(override=True) # no-op on HuggingFace (env vars injected by HF Secrets)
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware
import agent
import session as session_store
from security import ALLOWED_ORIGINS, limiter, verify_portfolio_key
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ── Lifespan ──────────────────────────────────────────────────────────────────
@asynccontextmanager
async def lifespan(app: FastAPI):
logger.info("Building FAISS knowledge index...")
agent.build_index()
logger.info("Starting session cleanup task...")
cleanup_task = asyncio.create_task(session_store.cleanup_loop())
yield
cleanup_task.cancel()
# ── App ───────────────────────────────────────────────────────────────────────
app = FastAPI(
title = "Sulitha's Portfolio Agent",
description = "AI agent that represents Sulitha Nulaksha Bandara.",
version = "1.0.0",
lifespan = lifespan,
docs_url = None,
redoc_url = None,
)
app.state.limiter = limiter
app.add_middleware(SlowAPIMiddleware)
app.add_middleware(
CORSMiddleware,
allow_origins = ALLOWED_ORIGINS,
allow_credentials = False,
allow_methods = ["POST", "GET"],
allow_headers = ["Content-Type", "X-Portfolio-Key"],
)
# ── Error handlers ────────────────────────────────────────────────────────────
@app.exception_handler(RateLimitExceeded)
async def rate_limit_handler(request: Request, exc: RateLimitExceeded):
return JSONResponse(
status_code = 429,
content = {"detail": "Too many messages β€” please wait a moment and try again."},
)
# ── Schemas ───────────────────────────────────────────────────────────────────
class ChatRequest(BaseModel):
message: str
session_id: str | None = None # None on first message; frontend stores and resends
class ChatResponse(BaseModel):
reply: str
session_id: str
# ── Routes ────────────────────────────────────────────────────────────────────
@app.get("/health")
async def health():
return {"status": "ok"}
@app.post("/chat", response_model=ChatResponse)
@limiter.limit("10/minute")
async def chat_endpoint(request: Request, body: ChatRequest):
"""
Security order:
1. X-Portfolio-Key header β€” rejects anything not coming from the Vercel proxy
2. Rate limit β€” 10 requests/min per IP
3. Session message cap β€” checked inside agent.chat
"""
verify_portfolio_key(request)
session_id = body.session_id or str(uuid.uuid4())
message = body.message.strip()
if not message:
raise HTTPException(status_code=400, detail="Message cannot be empty.")
if len(message) > 1000:
raise HTTPException(status_code=400, detail="Message too long (max 1000 chars).")
reply = await agent.chat(message, session_id)
return ChatResponse(reply=reply, session_id=session_id)