acra-api / app.py
Nottybro's picture
feat: add llm_endpoint, llm_api_key, llm_model to QueryRequest
b7470ec verified
from fastapi import FastAPI, Header
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List
from contextlib import asynccontextmanager
import os
from auth import verify_api_key
from credits import check_and_deduct_credits
from acra import run_acra_pipeline
from classifier_inference import warm_up
@asynccontextmanager
async def lifespan(app: FastAPI):
print("Loading ACRA classifier...")
warm_up()
print("Ready \u2713")
yield
app = FastAPI(title="ACRA API",
description="Adaptive Contextual Retrieval Architecture \u2014 NurricAI",
version="1.0.0", lifespan=lifespan)
app.add_middleware(CORSMiddleware, allow_origins=["*"],
allow_methods=["*"], allow_headers=["*"])
class IngestRequest(BaseModel):
texts: List[str]
metadata: Optional[List[dict]] = None
namespace: Optional[str] = "default"
class QueryRequest(BaseModel):
query: str
namespace: Optional[str] = "default"
top_k: Optional[int] = 5
rerank: Optional[bool] = True
use_web: Optional[bool] = False
llm_endpoint: Optional[str] = None
llm_api_key: Optional[str] = None
llm_model: Optional[str] = None
class QueryResponse(BaseModel):
answer: str
sources: List[dict]
credits_used: int
credits_remaining: int
complexity: Optional[dict] = None
retrieval_source: Optional[str] = None
cost: Optional[dict] = None
cost: Optional[dict] = None
@app.get("/")
def root(): return {"status": "ACRA API is live \U0001F680", "docs": "/docs"}
@app.get("/health")
def health(): return {"status": "ok"}
@app.post("/v1/ingest")
async def ingest(body: IngestRequest,
x_api_key: str = Header(..., alias="X-API-Key")):
user = await verify_api_key(x_api_key)
cost = max(1, len(body.texts) // 10)
remaining = await check_and_deduct_credits(user["id"], cost)
inserted = await run_acra_pipeline(
mode="ingest", texts=body.texts,
metadata=body.metadata or [{} for _ in body.texts],
namespace=body.namespace, user_id=user["id"])
return {"status": "success", "chunks_indexed": inserted,
"credits_used": cost, "credits_remaining": remaining}
@app.post("/v1/query", response_model=QueryResponse)
async def query(body: QueryRequest,
x_api_key: str = Header(..., alias="X-API-Key")):
user = await verify_api_key(x_api_key)
remaining = await check_and_deduct_credits(user["id"], 1)
result = await run_acra_pipeline(
mode="query", query=body.query, namespace=body.namespace,
top_k=body.top_k, rerank=body.rerank, user_id=user["id"],
use_web=body.use_web)
return QueryResponse(
answer = result["answer"],
sources = result["sources"],
credits_used = 1,
credits_remaining = remaining,
complexity = result.get("complexity"),
retrieval_source = result.get("retrieval_source"),
cost = result.get("cost"),
)
@app.get("/v1/usage")
async def usage(x_api_key: str = Header(..., alias="X-API-Key")):
user = await verify_api_key(x_api_key)
return {"plan": user["plan"],
"credits_remaining": user["credits_remaining"],
"credits_reset": user["credits_reset_at"]}