Spaces:
Paused
Paused
File size: 4,298 Bytes
7df55e6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | """
app.py
======
FastAPI entrypoint.
- Loads .env for local development
- CORS configured for portfolio domain only
- Rate limiting via slowapi (10 req/min per IP)
- X-Portfolio-Key header check on /chat (set server-side by Vercel proxy)
- FAISS index built at startup
- Session cleanup background task started at startup
"""
import asyncio
import logging
import uuid
from contextlib import asynccontextmanager
from dotenv import load_dotenv
load_dotenv(override=True) # no-op on HuggingFace (env vars injected by HF Secrets)
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware
import agent
import session as session_store
from security import ALLOWED_ORIGINS, limiter, verify_portfolio_key
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ββ Lifespan ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@asynccontextmanager
async def lifespan(app: FastAPI):
logger.info("Building FAISS knowledge index...")
agent.build_index()
logger.info("Starting session cleanup task...")
cleanup_task = asyncio.create_task(session_store.cleanup_loop())
yield
cleanup_task.cancel()
# ββ App βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
app = FastAPI(
title = "Sulitha's Portfolio Agent",
description = "AI agent that represents Sulitha Nulaksha Bandara.",
version = "1.0.0",
lifespan = lifespan,
docs_url = None,
redoc_url = None,
)
app.state.limiter = limiter
app.add_middleware(SlowAPIMiddleware)
app.add_middleware(
CORSMiddleware,
allow_origins = ALLOWED_ORIGINS,
allow_credentials = False,
allow_methods = ["POST", "GET"],
allow_headers = ["Content-Type", "X-Portfolio-Key"],
)
# ββ Error handlers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@app.exception_handler(RateLimitExceeded)
async def rate_limit_handler(request: Request, exc: RateLimitExceeded):
return JSONResponse(
status_code = 429,
content = {"detail": "Too many messages β please wait a moment and try again."},
)
# ββ Schemas βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class ChatRequest(BaseModel):
message: str
session_id: str | None = None # None on first message; frontend stores and resends
class ChatResponse(BaseModel):
reply: str
session_id: str
# ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@app.get("/health")
async def health():
return {"status": "ok"}
@app.post("/chat", response_model=ChatResponse)
@limiter.limit("10/minute")
async def chat_endpoint(request: Request, body: ChatRequest):
"""
Security order:
1. X-Portfolio-Key header β rejects anything not coming from the Vercel proxy
2. Rate limit β 10 requests/min per IP
3. Session message cap β checked inside agent.chat
"""
verify_portfolio_key(request)
session_id = body.session_id or str(uuid.uuid4())
message = body.message.strip()
if not message:
raise HTTPException(status_code=400, detail="Message cannot be empty.")
if len(message) > 1000:
raise HTTPException(status_code=400, detail="Message too long (max 1000 chars).")
reply = await agent.chat(message, session_id)
return ChatResponse(reply=reply, session_id=session_id)
|