analytics-engine / app /routers /ai_query.py
Peter Mutwiri
fixed import issues
9efb816
# app/routers/ai_query.py
from fastapi import APIRouter, Depends, HTTPException, Query
from app.service.vector_service import VectorService
from app.service.llm_service import LocalLLMService # Your existing LLM file
from app.deps import verify_api_key
router = APIRouter(prefix="/api/v1/ai", tags=["ai"])
@router.post("/query")
async def ai_query(
query: str,
org_id: str = Query(..., description="Organization ID"),
api_key: str = Depends(verify_api_key),
):
"""RAG endpoint: Question β†’ Vector Search β†’ LLM β†’ Answer"""
"""RAG endpoint: Question β†’ Vector Search β†’ LLM β†’ Answer"""
try:
# 1. Search vector DB for relevant context
vector_service = VectorService(org_id)
context = vector_service.semantic_search(query, top_k=5)
if not context:
return {
"answer": "I don't have enough recent data to answer that. Try asking about sales, inventory, or customer patterns.",
"sources": []
}
# 2. Build RAG prompt with context
context_str = "\n\n".join([
f"Transaction: {c['text']} (Metadata: {c['metadata']})"
for c in context
])
prompt = f"""You are a retail analytics AI. Answer the user's question using ONLY the transaction data below.
**User Question:** {query}
**Relevant Transactions (Last 7 Days):**
{context_str}
**Instructions:**
- If the data doesn't support the question, say so
- Provide specific numbers and dates when available
- Cite transaction IDs if present
- Keep answer under 200 words
- Format with markdown for clarity
"""
# 3. Call your existing LLM
llm_service = LocalLLMService()
answer = await llm_service.generate(prompt)
return {
"answer": answer,
"sources": context,
"query": query
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"AI Query failed: {str(e)}")
# Health check endpoint
@router.get("/health")
async def ai_health():
return {"status": "ready", "model": "sentence-transformers/all-MiniLM-L6-v2"}