from fastapi import FastAPI, Header from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional, List from contextlib import asynccontextmanager import os from auth import verify_api_key from credits import check_and_deduct_credits from acra import run_acra_pipeline from classifier_inference import warm_up @asynccontextmanager async def lifespan(app: FastAPI): print("Loading ACRA classifier...") warm_up() print("Ready \u2713") yield app = FastAPI(title="ACRA API", description="Adaptive Contextual Retrieval Architecture \u2014 NurricAI", version="1.0.0", lifespan=lifespan) app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) class IngestRequest(BaseModel): texts: List[str] metadata: Optional[List[dict]] = None namespace: Optional[str] = "default" class QueryRequest(BaseModel): query: str namespace: Optional[str] = "default" top_k: Optional[int] = 5 rerank: Optional[bool] = True use_web: Optional[bool] = False llm_endpoint: Optional[str] = None llm_api_key: Optional[str] = None llm_model: Optional[str] = None class QueryResponse(BaseModel): answer: str sources: List[dict] credits_used: int credits_remaining: int complexity: Optional[dict] = None retrieval_source: Optional[str] = None cost: Optional[dict] = None cost: Optional[dict] = None @app.get("/") def root(): return {"status": "ACRA API is live \U0001F680", "docs": "/docs"} @app.get("/health") def health(): return {"status": "ok"} @app.post("/v1/ingest") async def ingest(body: IngestRequest, x_api_key: str = Header(..., alias="X-API-Key")): user = await verify_api_key(x_api_key) cost = max(1, len(body.texts) // 10) remaining = await check_and_deduct_credits(user["id"], cost) inserted = await run_acra_pipeline( mode="ingest", texts=body.texts, metadata=body.metadata or [{} for _ in body.texts], namespace=body.namespace, user_id=user["id"]) return {"status": "success", "chunks_indexed": inserted, "credits_used": cost, "credits_remaining": remaining} @app.post("/v1/query", response_model=QueryResponse) async def query(body: QueryRequest, x_api_key: str = Header(..., alias="X-API-Key")): user = await verify_api_key(x_api_key) remaining = await check_and_deduct_credits(user["id"], 1) result = await run_acra_pipeline( mode="query", query=body.query, namespace=body.namespace, top_k=body.top_k, rerank=body.rerank, user_id=user["id"], use_web=body.use_web) return QueryResponse( answer = result["answer"], sources = result["sources"], credits_used = 1, credits_remaining = remaining, complexity = result.get("complexity"), retrieval_source = result.get("retrieval_source"), cost = result.get("cost"), ) @app.get("/v1/usage") async def usage(x_api_key: str = Header(..., alias="X-API-Key")): user = await verify_api_key(x_api_key) return {"plan": user["plan"], "credits_remaining": user["credits_remaining"], "credits_reset": user["credits_reset_at"]}