| |
| from fastapi import APIRouter, Depends, HTTPException, Query |
| from app.service.vector_service import VectorService |
| from app.service.llm_service import LocalLLMService |
| from app.deps import verify_api_key |
|
|
| router = APIRouter(prefix="/api/v1/ai", tags=["ai"]) |
|
|
| @router.post("/query") |
| async def ai_query( |
| query: str, |
| org_id: str = Query(..., description="Organization ID"), |
| api_key: str = Depends(verify_api_key), |
| ): |
| """RAG endpoint: Question β Vector Search β LLM β Answer""" |
| """RAG endpoint: Question β Vector Search β LLM β Answer""" |
| |
| try: |
| |
| vector_service = VectorService(org_id) |
| context = vector_service.semantic_search(query, top_k=5) |
| |
| if not context: |
| return { |
| "answer": "I don't have enough recent data to answer that. Try asking about sales, inventory, or customer patterns.", |
| "sources": [] |
| } |
| |
| |
| context_str = "\n\n".join([ |
| f"Transaction: {c['text']} (Metadata: {c['metadata']})" |
| for c in context |
| ]) |
| |
| prompt = f"""You are a retail analytics AI. Answer the user's question using ONLY the transaction data below. |
| |
| **User Question:** {query} |
| |
| **Relevant Transactions (Last 7 Days):** |
| {context_str} |
| |
| **Instructions:** |
| - If the data doesn't support the question, say so |
| - Provide specific numbers and dates when available |
| - Cite transaction IDs if present |
| - Keep answer under 200 words |
| - Format with markdown for clarity |
| """ |
| |
| |
| llm_service = LocalLLMService() |
| answer = await llm_service.generate(prompt) |
| |
| return { |
| "answer": answer, |
| "sources": context, |
| "query": query |
| } |
| |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=f"AI Query failed: {str(e)}") |
|
|
| |
| @router.get("/health") |
| async def ai_health(): |
| return {"status": "ready", "model": "sentence-transformers/all-MiniLM-L6-v2"} |