Spaces:
Sleeping
Sleeping
Initial WorkWise backend deployment
Browse files- .env.example +23 -0
- app.py +17 -0
- app/__init__.py +2 -0
- app/config.py +38 -0
- app/main.py +61 -0
- app/models/__init__.py +1 -0
- app/models/jira_schema.py +52 -0
- app/routes/__init__.py +1 -0
- app/routes/ask_routes.py +83 -0
- app/routes/ingest_routes.py +52 -0
- app/routes/metrics_routes copy.py +76 -0
- app/routes/metrics_routes.py.qdrant +80 -0
- app/services/__init__.py +1 -0
- app/services/data_ingestion.py +89 -0
- app/services/embeddings.py +41 -0
- app/services/generator.py +88 -0
- app/services/retriever.py +66 -0
- app/services/vector_store.py +141 -0
- app/services/vector_store.py.qdrant +96 -0
- app/utils/__init__.py +1 -0
- app/utils/logger.py +19 -0
- app/utils/response_builder.py +35 -0
- app/utils/vector_store.py +96 -0
- requirements.txt +11 -0
.env.example
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Faiss (local index) Configuration
|
| 2 |
+
FAISS_INDEX_PATH=data/faiss.index
|
| 3 |
+
FAISS_PAYLOADS_PATH=data/faiss_payloads.json
|
| 4 |
+
|
| 5 |
+
# Qdrant Configuration
|
| 6 |
+
QDRANT_URL=http://localhost:6333
|
| 7 |
+
QDRANT_API_KEY=
|
| 8 |
+
QDRANT_COLLECTION_NAME=jira_tickets
|
| 9 |
+
|
| 10 |
+
# Hugging Face Configuration
|
| 11 |
+
HF_API_URL=https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1
|
| 12 |
+
HF_TOKEN=your_huggingface_token_here
|
| 13 |
+
|
| 14 |
+
# Embedding Model
|
| 15 |
+
EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 16 |
+
|
| 17 |
+
# Server Configuration
|
| 18 |
+
HOST=0.0.0.0
|
| 19 |
+
PORT=8000
|
| 20 |
+
LOG_LEVEL=info
|
| 21 |
+
|
| 22 |
+
# CORS
|
| 23 |
+
ALLOWED_ORIGINS=http://localhost:5173,https://your-frontend.pages.dev
|
app.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from app.main import app as fastapi_app # your existing FastAPI app
|
| 4 |
+
|
| 5 |
+
# Mount FastAPI under Gradio
|
| 6 |
+
gradio_app = gr.Blocks()
|
| 7 |
+
gradio_app.launch = lambda *args, **kwargs: None # Dummy launch; not used
|
| 8 |
+
|
| 9 |
+
# Hugging Face expects a variable called `app`
|
| 10 |
+
app = FastAPI()
|
| 11 |
+
|
| 12 |
+
@app.get("/")
|
| 13 |
+
def root():
|
| 14 |
+
return {"message": "WorkWise Backend (Faiss + FastAPI) on ZeroGPU"}
|
| 15 |
+
|
| 16 |
+
# Mount your existing FastAPI app
|
| 17 |
+
app.mount("/api", fastapi_app)
|
app/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""WorkWise Backend Application"""
|
| 2 |
+
__version__ = "1.0.0"
|
app/config.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration management for WorkWise backend"""
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
class Settings:
|
| 8 |
+
"""Application settings loaded from environment variables"""
|
| 9 |
+
|
| 10 |
+
# Faiss (local) configuration
|
| 11 |
+
FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "data/faiss.index")
|
| 12 |
+
FAISS_PAYLOADS_PATH: str = os.getenv("FAISS_PAYLOADS_PATH", "data/faiss_payloads.json")
|
| 13 |
+
|
| 14 |
+
# Qdrant Configuration
|
| 15 |
+
QDRANT_URL: str = os.getenv("QDRANT_URL", "http://localhost:6333")
|
| 16 |
+
QDRANT_API_KEY: str = os.getenv("QDRANT_API_KEY", "")
|
| 17 |
+
QDRANT_COLLECTION_NAME: str = os.getenv("QDRANT_COLLECTION_NAME", "jira_tickets")
|
| 18 |
+
|
| 19 |
+
# Hugging Face Configuration
|
| 20 |
+
HF_API_URL: str = os.getenv("HF_API_URL", "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1")
|
| 21 |
+
HF_TOKEN: str = os.getenv("HF_TOKEN", "")
|
| 22 |
+
|
| 23 |
+
# Embedding Model
|
| 24 |
+
EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 25 |
+
|
| 26 |
+
# Server Configuration
|
| 27 |
+
HOST: str = os.getenv("HOST", "0.0.0.0")
|
| 28 |
+
PORT: int = int(os.getenv("PORT", 8000))
|
| 29 |
+
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "info")
|
| 30 |
+
|
| 31 |
+
# CORS
|
| 32 |
+
ALLOWED_ORIGINS: list = os.getenv("ALLOWED_ORIGINS", "http://localhost:5173").split(",")
|
| 33 |
+
|
| 34 |
+
# Vector Search
|
| 35 |
+
TOP_K: int = 5
|
| 36 |
+
SCORE_THRESHOLD: float = 0.5
|
| 37 |
+
|
| 38 |
+
settings = Settings()
|
app/main.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Main FastAPI application entry point"""
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from app.config import settings
|
| 5 |
+
from app.routes import ingest_routes, ask_routes, metrics_routes
|
| 6 |
+
from app.utils.logger import setup_logger
|
| 7 |
+
|
| 8 |
+
logger = setup_logger(__name__)
|
| 9 |
+
|
| 10 |
+
app = FastAPI(
|
| 11 |
+
title="WorkWise API",
|
| 12 |
+
description="RAG-powered Jira analytics application",
|
| 13 |
+
version="1.0.0"
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
# CORS Middleware
|
| 17 |
+
app.add_middleware(
|
| 18 |
+
CORSMiddleware,
|
| 19 |
+
allow_origins=settings.ALLOWED_ORIGINS,
|
| 20 |
+
allow_credentials=True,
|
| 21 |
+
allow_methods=["*"],
|
| 22 |
+
allow_headers=["*"],
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Include routers
|
| 26 |
+
app.include_router(ingest_routes.router, prefix="/api", tags=["Ingestion"])
|
| 27 |
+
app.include_router(ask_routes.router, prefix="/api", tags=["Query"])
|
| 28 |
+
app.include_router(metrics_routes.router, prefix="/api", tags=["Metrics"])
|
| 29 |
+
|
| 30 |
+
@app.get("/")
|
| 31 |
+
async def root():
|
| 32 |
+
"""Health check endpoint"""
|
| 33 |
+
return {
|
| 34 |
+
"status": "online",
|
| 35 |
+
"service": "WorkWise API",
|
| 36 |
+
"version": "1.0.0"
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
@app.get("/health")
|
| 40 |
+
async def health_check():
|
| 41 |
+
"""Detailed health check"""
|
| 42 |
+
info = vector_store.get_collection_info()
|
| 43 |
+
return {
|
| 44 |
+
"status": "healthy",
|
| 45 |
+
"index_path": settings.FAISS_INDEX_PATH,
|
| 46 |
+
"payloads_path": settings.FAISS_PAYLOADS_PATH,
|
| 47 |
+
"vectors_count": info.get("vectors_count", 0)
|
| 48 |
+
|
| 49 |
+
#"qdrant_url": settings.QDRANT_URL,
|
| 50 |
+
#"collection": settings.QDRANT_COLLECTION_NAME
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
import uvicorn
|
| 55 |
+
uvicorn.run(
|
| 56 |
+
"app.main:app",
|
| 57 |
+
host=settings.HOST,
|
| 58 |
+
port=settings.PORT,
|
| 59 |
+
reload=True,
|
| 60 |
+
log_level=settings.LOG_LEVEL
|
| 61 |
+
)
|
app/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Data models for WorkWise"""
|
app/models/jira_schema.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic models for Jira ticket data"""
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
from typing import Optional, List
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
class JiraTicket(BaseModel):
|
| 7 |
+
"""Jira ticket schema"""
|
| 8 |
+
ticket_id: str = Field(..., description="Unique ticket identifier")
|
| 9 |
+
summary: str = Field(..., description="Ticket summary")
|
| 10 |
+
description: Optional[str] = Field(None, description="Detailed description")
|
| 11 |
+
status: str = Field(..., description="Current status")
|
| 12 |
+
priority: str = Field(..., description="Priority level")
|
| 13 |
+
assignee: Optional[str] = Field(None, description="Assigned team member")
|
| 14 |
+
reporter: str = Field(..., description="Ticket creator")
|
| 15 |
+
project: str = Field(..., description="Project name")
|
| 16 |
+
created_date: str = Field(..., description="Creation timestamp")
|
| 17 |
+
resolved_date: Optional[str] = Field(None, description="Resolution timestamp")
|
| 18 |
+
issue_type: str = Field(..., description="Type of issue")
|
| 19 |
+
labels: Optional[str] = Field(None, description="Comma-separated labels")
|
| 20 |
+
|
| 21 |
+
class IngestRequest(BaseModel):
|
| 22 |
+
"""Request model for data ingestion"""
|
| 23 |
+
file_path: str = Field(..., description="Path to Jira data file")
|
| 24 |
+
|
| 25 |
+
class IngestResponse(BaseModel):
|
| 26 |
+
"""Response model for data ingestion"""
|
| 27 |
+
status: str
|
| 28 |
+
records_indexed: int
|
| 29 |
+
message: Optional[str] = None
|
| 30 |
+
|
| 31 |
+
class QueryRequest(BaseModel):
|
| 32 |
+
"""Request model for RAG queries"""
|
| 33 |
+
query: str = Field(..., description="Natural language question")
|
| 34 |
+
|
| 35 |
+
class ChartData(BaseModel):
|
| 36 |
+
"""Chart data structure"""
|
| 37 |
+
type: str = Field(..., description="Chart type: bar, line, pie")
|
| 38 |
+
data: List[dict] = Field(..., description="Chart data points")
|
| 39 |
+
|
| 40 |
+
class QueryResponse(BaseModel):
|
| 41 |
+
"""Response model for RAG queries"""
|
| 42 |
+
answer: str
|
| 43 |
+
chart: Optional[ChartData] = None
|
| 44 |
+
sources: Optional[List[str]] = None
|
| 45 |
+
|
| 46 |
+
class MetricsResponse(BaseModel):
|
| 47 |
+
"""Response model for metrics endpoint"""
|
| 48 |
+
avg_resolution_time: str
|
| 49 |
+
open_tickets: int
|
| 50 |
+
closed_tickets: int
|
| 51 |
+
sla_compliance: str
|
| 52 |
+
total_tickets: int
|
app/routes/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""API route modules"""
|
app/routes/ask_routes.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Routes for RAG queries"""
|
| 2 |
+
from fastapi import APIRouter, HTTPException
|
| 3 |
+
from app.models.jira_schema import QueryRequest, QueryResponse
|
| 4 |
+
from app.services.retriever import retriever
|
| 5 |
+
from app.services.generator import generator
|
| 6 |
+
from app.utils.response_builder import build_query_response, extract_chart_intent
|
| 7 |
+
from app.utils.logger import setup_logger
|
| 8 |
+
from collections import Counter
|
| 9 |
+
|
| 10 |
+
logger = setup_logger(__name__)
|
| 11 |
+
router = APIRouter()
|
| 12 |
+
|
| 13 |
+
@router.post("/ask", response_model=QueryResponse)
|
| 14 |
+
async def ask_question(request: QueryRequest):
|
| 15 |
+
"""
|
| 16 |
+
Answer natural language questions using RAG
|
| 17 |
+
|
| 18 |
+
- Retrieves relevant Jira tickets
|
| 19 |
+
- Generates answer using LLM
|
| 20 |
+
- Optionally includes visualizations
|
| 21 |
+
"""
|
| 22 |
+
try:
|
| 23 |
+
logger.info(f"Processing query: {request.query}")
|
| 24 |
+
|
| 25 |
+
# Retrieve relevant documents
|
| 26 |
+
results = retriever.retrieve(request.query)
|
| 27 |
+
|
| 28 |
+
if not results:
|
| 29 |
+
return build_query_response(
|
| 30 |
+
answer="I couldn't find any relevant Jira tickets for your question. Please try rephrasing or check if data has been ingested.",
|
| 31 |
+
sources=[]
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# Format context
|
| 35 |
+
context = retriever.format_context(results)
|
| 36 |
+
|
| 37 |
+
# Generate answer
|
| 38 |
+
answer = generator.generate_rag_response(request.query, context)
|
| 39 |
+
|
| 40 |
+
# Extract source ticket IDs
|
| 41 |
+
sources = [r['payload'].get('ticket_id', 'Unknown') for r in results[:3]]
|
| 42 |
+
|
| 43 |
+
# Check if visualization is needed
|
| 44 |
+
chart_type = extract_chart_intent(request.query)
|
| 45 |
+
chart_data = None
|
| 46 |
+
|
| 47 |
+
if chart_type:
|
| 48 |
+
chart_data = _generate_chart_data(results, chart_type, request.query)
|
| 49 |
+
|
| 50 |
+
return build_query_response(
|
| 51 |
+
answer=answer,
|
| 52 |
+
chart_type=chart_type,
|
| 53 |
+
chart_data=chart_data,
|
| 54 |
+
sources=sources
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
except Exception as e:
|
| 58 |
+
logger.error(f"Query failed: {str(e)}")
|
| 59 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 60 |
+
|
| 61 |
+
def _generate_chart_data(results, chart_type, query):
|
| 62 |
+
"""Generate chart data from retrieved results"""
|
| 63 |
+
payloads = [r['payload'] for r in results]
|
| 64 |
+
|
| 65 |
+
# Status distribution
|
| 66 |
+
if 'status' in query.lower():
|
| 67 |
+
status_counts = Counter(p.get('status', 'Unknown') for p in payloads)
|
| 68 |
+
return [{"label": k, "value": v} for k, v in status_counts.items()]
|
| 69 |
+
|
| 70 |
+
# Priority distribution
|
| 71 |
+
elif 'priority' in query.lower():
|
| 72 |
+
priority_counts = Counter(p.get('priority', 'Unknown') for p in payloads)
|
| 73 |
+
return [{"label": k, "value": v} for k, v in priority_counts.items()]
|
| 74 |
+
|
| 75 |
+
# Project distribution
|
| 76 |
+
elif 'project' in query.lower():
|
| 77 |
+
project_counts = Counter(p.get('project', 'Unknown') for p in payloads)
|
| 78 |
+
return [{"label": k, "value": v} for k, v in project_counts.items()]
|
| 79 |
+
|
| 80 |
+
# Default: status breakdown
|
| 81 |
+
else:
|
| 82 |
+
status_counts = Counter(p.get('status', 'Unknown') for p in payloads)
|
| 83 |
+
return [{"label": k, "value": v} for k, v in status_counts.items()]
|
app/routes/ingest_routes.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Routes for data ingestion"""
|
| 2 |
+
from fastapi import APIRouter, HTTPException
|
| 3 |
+
from app.models.jira_schema import IngestRequest, IngestResponse
|
| 4 |
+
from app.services.data_ingestion import DataIngestionService
|
| 5 |
+
from app.services.embeddings import embedding_service
|
| 6 |
+
from app.services.vector_store import vector_store
|
| 7 |
+
from app.utils.logger import setup_logger
|
| 8 |
+
|
| 9 |
+
logger = setup_logger(__name__)
|
| 10 |
+
router = APIRouter()
|
| 11 |
+
|
| 12 |
+
@router.post("/ingest", response_model=IngestResponse)
|
| 13 |
+
async def ingest_data(request: IngestRequest):
|
| 14 |
+
"""
|
| 15 |
+
Ingest Jira data from CSV/JSON file
|
| 16 |
+
|
| 17 |
+
- Parses the file
|
| 18 |
+
- Generates embeddings
|
| 19 |
+
- Stores in Qdrant vector database
|
| 20 |
+
"""
|
| 21 |
+
try:
|
| 22 |
+
logger.info(f"Starting ingestion from: {request.file_path}")
|
| 23 |
+
|
| 24 |
+
# Load data
|
| 25 |
+
records = DataIngestionService.load_data(request.file_path)
|
| 26 |
+
|
| 27 |
+
if not records:
|
| 28 |
+
raise HTTPException(status_code=400, detail="No records found in file")
|
| 29 |
+
|
| 30 |
+
# Extract searchable text
|
| 31 |
+
texts = [record.get('searchable_text', '') for record in records]
|
| 32 |
+
|
| 33 |
+
# Generate embeddings
|
| 34 |
+
embeddings = embedding_service.embed_batch(texts)
|
| 35 |
+
|
| 36 |
+
# Create collection (recreates if exists)
|
| 37 |
+
vector_store.create_collection(vector_size=embedding_service.get_dimension())
|
| 38 |
+
|
| 39 |
+
# Store vectors
|
| 40 |
+
count = vector_store.upsert_vectors(embeddings, records)
|
| 41 |
+
|
| 42 |
+
logger.info(f"Successfully indexed {count} records")
|
| 43 |
+
|
| 44 |
+
return IngestResponse(
|
| 45 |
+
status="success",
|
| 46 |
+
records_indexed=count,
|
| 47 |
+
message=f"Successfully ingested and indexed {count} Jira tickets"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
except Exception as e:
|
| 51 |
+
logger.error(f"Ingestion failed: {str(e)}")
|
| 52 |
+
raise HTTPException(status_code=500, detail=str(e))
|
app/routes/metrics_routes copy.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Routes for aggregate metrics"""
|
| 2 |
+
from fastapi import APIRouter, HTTPException
|
| 3 |
+
from app.models.jira_schema import MetricsResponse
|
| 4 |
+
from app.services.vector_store import vector_store
|
| 5 |
+
from app.utils.logger import setup_logger
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
logger = setup_logger(__name__)
|
| 9 |
+
router = APIRouter()
|
| 10 |
+
|
| 11 |
+
@router.get("/metrics", response_model=MetricsResponse)
|
| 12 |
+
async def get_metrics():
|
| 13 |
+
"""
|
| 14 |
+
Get aggregate metrics from Jira data
|
| 15 |
+
|
| 16 |
+
- Average resolution time
|
| 17 |
+
- Open/closed ticket counts
|
| 18 |
+
- SLA compliance percentage
|
| 19 |
+
"""
|
| 20 |
+
try:
|
| 21 |
+
logger.info("Calculating metrics...")
|
| 22 |
+
|
| 23 |
+
info = vector_store.get_collection_info()
|
| 24 |
+
total_tickets = info.get('vectors_count', 0)
|
| 25 |
+
if total_tickets == 0:
|
| 26 |
+
raise HTTPException(status_code=404, detail="No data available. Please ingest data first.")
|
| 27 |
+
|
| 28 |
+
# Pull a sample or all payloads from the sidecar store
|
| 29 |
+
payloads = vector_store.get_payloads_sample(limit=100)
|
| 30 |
+
if not payloads:
|
| 31 |
+
raise HTTPException(status_code=404, detail="Unable to retrieve metrics data")
|
| 32 |
+
|
| 33 |
+
# Calculate metrics
|
| 34 |
+
open_statuses = {'Open', 'In Progress', 'To Do'}
|
| 35 |
+
closed_statuses = {'Closed', 'Done', 'Resolved'}
|
| 36 |
+
|
| 37 |
+
open_tickets = sum(1 for p in payloads if (p.get('status') or '') in open_statuses)
|
| 38 |
+
closed_tickets = sum(1 for p in payloads if (p.get('status') or '') in closed_statuses)
|
| 39 |
+
|
| 40 |
+
# Average resolution time (days)
|
| 41 |
+
resolution_times = []
|
| 42 |
+
for p in payloads:
|
| 43 |
+
created = p.get('created_date')
|
| 44 |
+
resolved = p.get('resolved_date')
|
| 45 |
+
if created and resolved:
|
| 46 |
+
try:
|
| 47 |
+
c = pd.to_datetime(created)
|
| 48 |
+
r = pd.to_datetime(resolved)
|
| 49 |
+
delta = (r - c).days
|
| 50 |
+
if delta >= 0:
|
| 51 |
+
resolution_times.append(delta)
|
| 52 |
+
except Exception:
|
| 53 |
+
pass
|
| 54 |
+
|
| 55 |
+
avg_resolution = (sum(resolution_times) / len(resolution_times)) if resolution_times else 0.0
|
| 56 |
+
avg_resolution_str = f"{avg_resolution:.1f} days"
|
| 57 |
+
|
| 58 |
+
# SLA compliance: resolved within 5 days
|
| 59 |
+
sla_threshold = 5
|
| 60 |
+
sla_compliant = sum(1 for t in resolution_times if t <= sla_threshold)
|
| 61 |
+
sla_pct = (sla_compliant / len(resolution_times) * 100) if resolution_times else 0.0
|
| 62 |
+
sla_compliance_str = f"{sla_pct:.0f}%"
|
| 63 |
+
|
| 64 |
+
return MetricsResponse(
|
| 65 |
+
avg_resolution_time=avg_resolution_str,
|
| 66 |
+
open_tickets=open_tickets,
|
| 67 |
+
closed_tickets=closed_tickets,
|
| 68 |
+
sla_compliance=sla_compliance_str,
|
| 69 |
+
total_tickets=total_tickets
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
except HTTPException:
|
| 73 |
+
raise
|
| 74 |
+
except Exception as e:
|
| 75 |
+
logger.error(f"Metrics calculation failed: {str(e)}")
|
| 76 |
+
raise HTTPException(status_code=500, detail=str(e))
|
app/routes/metrics_routes.py.qdrant
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Routes for aggregate metrics"""
|
| 2 |
+
from fastapi import APIRouter, HTTPException
|
| 3 |
+
from app.models.jira_schema import MetricsResponse
|
| 4 |
+
from app.services.vector_store import vector_store
|
| 5 |
+
from app.services.data_ingestion import DataIngestionService
|
| 6 |
+
from app.utils.logger import setup_logger
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
import pandas as pd
|
| 9 |
+
|
| 10 |
+
logger = setup_logger(__name__)
|
| 11 |
+
router = APIRouter()
|
| 12 |
+
|
| 13 |
+
@router.get("/metrics", response_model=MetricsResponse)
|
| 14 |
+
async def get_metrics():
|
| 15 |
+
"""
|
| 16 |
+
Get aggregate metrics from Jira data
|
| 17 |
+
|
| 18 |
+
- Average resolution time
|
| 19 |
+
- Open/closed ticket counts
|
| 20 |
+
- SLA compliance percentage
|
| 21 |
+
"""
|
| 22 |
+
try:
|
| 23 |
+
logger.info("Calculating metrics...")
|
| 24 |
+
|
| 25 |
+
# Get collection info
|
| 26 |
+
info = vector_store.get_collection_info()
|
| 27 |
+
total_tickets = info.get('vectors_count', 0)
|
| 28 |
+
|
| 29 |
+
if total_tickets == 0:
|
| 30 |
+
raise HTTPException(status_code=404, detail="No data available. Please ingest data first.")
|
| 31 |
+
|
| 32 |
+
# For MVP, retrieve sample of tickets to calculate metrics
|
| 33 |
+
# In production, this would query all tickets or use aggregated stats
|
| 34 |
+
from app.services.retriever import retriever
|
| 35 |
+
sample_results = retriever.retrieve("all tickets", top_k=100)
|
| 36 |
+
|
| 37 |
+
if not sample_results:
|
| 38 |
+
raise HTTPException(status_code=404, detail="Unable to retrieve metrics data")
|
| 39 |
+
|
| 40 |
+
payloads = [r['payload'] for r in sample_results]
|
| 41 |
+
|
| 42 |
+
# Calculate metrics
|
| 43 |
+
open_tickets = sum(1 for p in payloads if p.get('status') in ['Open', 'In Progress', 'To Do'])
|
| 44 |
+
closed_tickets = sum(1 for p in payloads if p.get('status') in ['Closed', 'Done', 'Resolved'])
|
| 45 |
+
|
| 46 |
+
# Calculate average resolution time
|
| 47 |
+
resolution_times = []
|
| 48 |
+
for p in payloads:
|
| 49 |
+
if p.get('created_date') and p.get('resolved_date'):
|
| 50 |
+
try:
|
| 51 |
+
created = pd.to_datetime(p['created_date'])
|
| 52 |
+
resolved = pd.to_datetime(p['resolved_date'])
|
| 53 |
+
delta = (resolved - created).days
|
| 54 |
+
if delta >= 0:
|
| 55 |
+
resolution_times.append(delta)
|
| 56 |
+
except:
|
| 57 |
+
pass
|
| 58 |
+
|
| 59 |
+
avg_resolution = sum(resolution_times) / len(resolution_times) if resolution_times else 0
|
| 60 |
+
avg_resolution_str = f"{avg_resolution:.1f} days"
|
| 61 |
+
|
| 62 |
+
# Calculate SLA compliance (simplified: tickets resolved within 5 days)
|
| 63 |
+
sla_threshold = 5
|
| 64 |
+
sla_compliant = sum(1 for t in resolution_times if t <= sla_threshold)
|
| 65 |
+
sla_compliance = (sla_compliant / len(resolution_times) * 100) if resolution_times else 0
|
| 66 |
+
sla_compliance_str = f"{sla_compliance:.0f}%"
|
| 67 |
+
|
| 68 |
+
return MetricsResponse(
|
| 69 |
+
avg_resolution_time=avg_resolution_str,
|
| 70 |
+
open_tickets=open_tickets,
|
| 71 |
+
closed_tickets=closed_tickets,
|
| 72 |
+
sla_compliance=sla_compliance_str,
|
| 73 |
+
total_tickets=total_tickets
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
except HTTPException:
|
| 77 |
+
raise
|
| 78 |
+
except Exception as e:
|
| 79 |
+
logger.error(f"Metrics calculation failed: {str(e)}")
|
| 80 |
+
raise HTTPException(status_code=500, detail=str(e))
|
app/services/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Business logic services"""
|
app/services/data_ingestion.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data ingestion service for parsing Jira exports"""
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import json
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from app.utils.logger import setup_logger
|
| 7 |
+
|
| 8 |
+
logger = setup_logger(__name__)
|
| 9 |
+
|
| 10 |
+
class DataIngestionService:
|
| 11 |
+
"""Handles parsing and preprocessing of Jira data files"""
|
| 12 |
+
|
| 13 |
+
@staticmethod
|
| 14 |
+
def parse_csv(file_path: str) -> List[Dict[str, Any]]:
|
| 15 |
+
"""Parse Jira CSV export"""
|
| 16 |
+
try:
|
| 17 |
+
df = pd.read_csv(file_path)
|
| 18 |
+
logger.info(f"Loaded {len(df)} records from {file_path}")
|
| 19 |
+
|
| 20 |
+
# Normalize column names
|
| 21 |
+
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
|
| 22 |
+
|
| 23 |
+
# Convert to list of dictionaries
|
| 24 |
+
records = df.to_dict('records')
|
| 25 |
+
|
| 26 |
+
# Clean and structure data
|
| 27 |
+
processed_records = []
|
| 28 |
+
for record in records:
|
| 29 |
+
processed = DataIngestionService._clean_record(record)
|
| 30 |
+
processed_records.append(processed)
|
| 31 |
+
|
| 32 |
+
return processed_records
|
| 33 |
+
|
| 34 |
+
except Exception as e:
|
| 35 |
+
logger.error(f"Error parsing CSV: {str(e)}")
|
| 36 |
+
raise
|
| 37 |
+
|
| 38 |
+
@staticmethod
|
| 39 |
+
def parse_json(file_path: str) -> List[Dict[str, Any]]:
|
| 40 |
+
"""Parse Jira JSON export"""
|
| 41 |
+
try:
|
| 42 |
+
with open(file_path, 'r') as f:
|
| 43 |
+
data = json.load(f)
|
| 44 |
+
|
| 45 |
+
if isinstance(data, dict) and 'issues' in data:
|
| 46 |
+
records = data['issues']
|
| 47 |
+
elif isinstance(data, list):
|
| 48 |
+
records = data
|
| 49 |
+
else:
|
| 50 |
+
raise ValueError("Unexpected JSON structure")
|
| 51 |
+
|
| 52 |
+
logger.info(f"Loaded {len(records)} records from {file_path}")
|
| 53 |
+
return [DataIngestionService._clean_record(r) for r in records]
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
logger.error(f"Error parsing JSON: {str(e)}")
|
| 57 |
+
raise
|
| 58 |
+
|
| 59 |
+
@staticmethod
|
| 60 |
+
def _clean_record(record: Dict[str, Any]) -> Dict[str, Any]:
|
| 61 |
+
"""Clean and normalize a single record"""
|
| 62 |
+
# Handle missing values
|
| 63 |
+
for key, value in record.items():
|
| 64 |
+
if pd.isna(value) or value == '' or value == 'None':
|
| 65 |
+
record[key] = None
|
| 66 |
+
|
| 67 |
+
# Create searchable text representation
|
| 68 |
+
text_fields = ['summary', 'description', 'status', 'priority', 'project']
|
| 69 |
+
text_parts = []
|
| 70 |
+
|
| 71 |
+
for field in text_fields:
|
| 72 |
+
if field in record and record[field]:
|
| 73 |
+
text_parts.append(f"{field}: {record[field]}")
|
| 74 |
+
|
| 75 |
+
record['searchable_text'] = " | ".join(text_parts)
|
| 76 |
+
|
| 77 |
+
return record
|
| 78 |
+
|
| 79 |
+
@staticmethod
|
| 80 |
+
def load_data(file_path: str) -> List[Dict[str, Any]]:
|
| 81 |
+
"""Load data from file (auto-detect format)"""
|
| 82 |
+
file_ext = Path(file_path).suffix.lower()
|
| 83 |
+
|
| 84 |
+
if file_ext == '.csv':
|
| 85 |
+
return DataIngestionService.parse_csv(file_path)
|
| 86 |
+
elif file_ext == '.json':
|
| 87 |
+
return DataIngestionService.parse_json(file_path)
|
| 88 |
+
else:
|
| 89 |
+
raise ValueError(f"Unsupported file format: {file_ext}")
|
app/services/embeddings.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Embedding generation service using sentence-transformers"""
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
from typing import List
|
| 4 |
+
import numpy as np
|
| 5 |
+
from app.config import settings
|
| 6 |
+
from app.utils.logger import setup_logger
|
| 7 |
+
|
| 8 |
+
logger = setup_logger(__name__)
|
| 9 |
+
|
| 10 |
+
class EmbeddingService:
|
| 11 |
+
"""Generate embeddings for text using sentence-transformers"""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
"""Initialize the embedding model"""
|
| 15 |
+
logger.info(f"Loading embedding model: {settings.EMBEDDING_MODEL}")
|
| 16 |
+
self.model = SentenceTransformer(settings.EMBEDDING_MODEL)
|
| 17 |
+
self.dimension = self.model.get_sentence_embedding_dimension()
|
| 18 |
+
logger.info(f"Embedding dimension: {self.dimension}")
|
| 19 |
+
|
| 20 |
+
def embed_text(self, text: str) -> List[float]:
|
| 21 |
+
"""Generate embedding for a single text"""
|
| 22 |
+
embedding = self.model.encode(text, convert_to_numpy=True)
|
| 23 |
+
return embedding.tolist()
|
| 24 |
+
|
| 25 |
+
def embed_batch(self, texts: List[str], batch_size: int = 32) -> List[List[float]]:
|
| 26 |
+
"""Generate embeddings for a batch of texts"""
|
| 27 |
+
logger.info(f"Embedding {len(texts)} texts...")
|
| 28 |
+
embeddings = self.model.encode(
|
| 29 |
+
texts,
|
| 30 |
+
batch_size=batch_size,
|
| 31 |
+
show_progress_bar=True,
|
| 32 |
+
convert_to_numpy=True
|
| 33 |
+
)
|
| 34 |
+
return embeddings.tolist()
|
| 35 |
+
|
| 36 |
+
def get_dimension(self) -> int:
|
| 37 |
+
"""Return embedding dimension"""
|
| 38 |
+
return self.dimension
|
| 39 |
+
|
| 40 |
+
# Global instance
|
| 41 |
+
embedding_service = EmbeddingService()
|
app/services/generator.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLM generation service using Hugging Face Inference API"""
|
| 2 |
+
import requests
|
| 3 |
+
from typing import Dict, Any, Optional
|
| 4 |
+
from app.config import settings
|
| 5 |
+
from app.utils.logger import setup_logger
|
| 6 |
+
|
| 7 |
+
logger = setup_logger(__name__)
|
| 8 |
+
|
| 9 |
+
class GeneratorService:
|
| 10 |
+
"""Handles text generation using Hugging Face models"""
|
| 11 |
+
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.api_url = settings.HF_API_URL
|
| 14 |
+
self.headers = {"Authorization": f"Bearer {settings.HF_TOKEN}"}
|
| 15 |
+
|
| 16 |
+
def generate(
|
| 17 |
+
self,
|
| 18 |
+
prompt: str,
|
| 19 |
+
max_tokens: int = 512,
|
| 20 |
+
temperature: float = 0.7
|
| 21 |
+
) -> str:
|
| 22 |
+
"""Generate text using the LLM"""
|
| 23 |
+
payload = {
|
| 24 |
+
"inputs": prompt,
|
| 25 |
+
"parameters": {
|
| 26 |
+
"max_new_tokens": max_tokens,
|
| 27 |
+
"temperature": temperature,
|
| 28 |
+
"return_full_text": False
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
logger.info("Calling Hugging Face API...")
|
| 34 |
+
response = requests.post(
|
| 35 |
+
self.api_url,
|
| 36 |
+
headers=self.headers,
|
| 37 |
+
json=payload,
|
| 38 |
+
timeout=30
|
| 39 |
+
)
|
| 40 |
+
response.raise_for_status()
|
| 41 |
+
|
| 42 |
+
result = response.json()
|
| 43 |
+
|
| 44 |
+
# Handle different response formats
|
| 45 |
+
if isinstance(result, list) and len(result) > 0:
|
| 46 |
+
generated_text = result[0].get('generated_text', '')
|
| 47 |
+
elif isinstance(result, dict):
|
| 48 |
+
generated_text = result.get('generated_text', '')
|
| 49 |
+
else:
|
| 50 |
+
generated_text = str(result)
|
| 51 |
+
|
| 52 |
+
logger.info("Generation successful")
|
| 53 |
+
return generated_text.strip()
|
| 54 |
+
|
| 55 |
+
except requests.exceptions.RequestException as e:
|
| 56 |
+
logger.error(f"API request failed: {str(e)}")
|
| 57 |
+
# Fallback to simple response
|
| 58 |
+
return self._fallback_response(prompt)
|
| 59 |
+
|
| 60 |
+
def _fallback_response(self, prompt: str) -> str:
|
| 61 |
+
"""Fallback response when API fails"""
|
| 62 |
+
return "I apologize, but I'm unable to generate a response at the moment. Please try again later."
|
| 63 |
+
|
| 64 |
+
def generate_rag_response(
|
| 65 |
+
self,
|
| 66 |
+
query: str,
|
| 67 |
+
context: str
|
| 68 |
+
) -> str:
|
| 69 |
+
"""Generate response using RAG pattern"""
|
| 70 |
+
prompt = self._build_rag_prompt(query, context)
|
| 71 |
+
return self.generate(prompt)
|
| 72 |
+
|
| 73 |
+
def _build_rag_prompt(self, query: str, context: str) -> str:
|
| 74 |
+
"""Build RAG prompt template"""
|
| 75 |
+
prompt = f"""<s>[INST] You are WorkWise, an AI assistant specialized in analyzing Jira project data. Answer the user's question based on the provided context.
|
| 76 |
+
|
| 77 |
+
Context:
|
| 78 |
+
{context}
|
| 79 |
+
|
| 80 |
+
User Question: {query}
|
| 81 |
+
|
| 82 |
+
Provide a clear, concise answer based on the context. If the context doesn't contain enough information, say so. [/INST]</s>
|
| 83 |
+
|
| 84 |
+
Answer:"""
|
| 85 |
+
return prompt
|
| 86 |
+
|
| 87 |
+
# Global instance
|
| 88 |
+
generator = GeneratorService()
|
app/services/retriever.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Retrieval service for semantic search"""
|
| 2 |
+
from typing import List, Dict, Any
|
| 3 |
+
from app.services.embeddings import embedding_service
|
| 4 |
+
from app.services.vector_store import vector_store
|
| 5 |
+
from app.config import settings
|
| 6 |
+
from app.utils.logger import setup_logger
|
| 7 |
+
|
| 8 |
+
logger = setup_logger(__name__)
|
| 9 |
+
|
| 10 |
+
class RetrieverService:
|
| 11 |
+
"""Handles semantic search over vector database"""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.embedding_service = embedding_service
|
| 15 |
+
self.vector_store = vector_store
|
| 16 |
+
|
| 17 |
+
def retrieve(self, query: str, top_k: int = None) -> List[Dict[str, Any]]:
|
| 18 |
+
"""Retrieve relevant documents for a query"""
|
| 19 |
+
if top_k is None:
|
| 20 |
+
top_k = settings.TOP_K
|
| 21 |
+
|
| 22 |
+
# Generate query embedding
|
| 23 |
+
logger.info(f"Retrieving documents for query: {query}")
|
| 24 |
+
query_embedding = self.embedding_service.embed_text(query)
|
| 25 |
+
|
| 26 |
+
#FAISS
|
| 27 |
+
results = self.vector_store.search(
|
| 28 |
+
query_vector=query_embedding,
|
| 29 |
+
limit=top_k,
|
| 30 |
+
score_threshold=settings.SCORE_THRESHOLD
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
#Qdrant
|
| 34 |
+
# Search vector database
|
| 35 |
+
# results = self.vector_store.search(
|
| 36 |
+
# query_vector=query_embedding,
|
| 37 |
+
# limit=top_k,
|
| 38 |
+
# score_threshold=settings.SCORE_THRESHOLD
|
| 39 |
+
# )
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
logger.info(f"Retrieved {len(results)} documents")
|
| 43 |
+
return results
|
| 44 |
+
|
| 45 |
+
def format_context(self, results: List[Dict[str, Any]]) -> str:
|
| 46 |
+
"""Format retrieved documents into context string"""
|
| 47 |
+
context_parts = []
|
| 48 |
+
|
| 49 |
+
for idx, result in enumerate(results, 1):
|
| 50 |
+
payload = result['payload']
|
| 51 |
+
score = result['score']
|
| 52 |
+
|
| 53 |
+
context_parts.append(f"[Document {idx}] (Relevance: {score:.2f})")
|
| 54 |
+
context_parts.append(f"Ticket: {payload.get('ticket_id', 'N/A')}")
|
| 55 |
+
context_parts.append(f"Project: {payload.get('project', 'N/A')}")
|
| 56 |
+
context_parts.append(f"Status: {payload.get('status', 'N/A')}")
|
| 57 |
+
context_parts.append(f"Priority: {payload.get('priority', 'N/A')}")
|
| 58 |
+
context_parts.append(f"Summary: {payload.get('summary', 'N/A')}")
|
| 59 |
+
if payload.get('description'):
|
| 60 |
+
context_parts.append(f"Description: {payload['description'][:200]}...")
|
| 61 |
+
context_parts.append("")
|
| 62 |
+
|
| 63 |
+
return "\n".join(context_parts)
|
| 64 |
+
|
| 65 |
+
# Global instance
|
| 66 |
+
retriever = RetrieverService()
|
app/services/vector_store.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Faiss vector store service (replaces Qdrant)"""
|
| 2 |
+
from typing import List, Dict, Any, Optional
|
| 3 |
+
from app.config import settings
|
| 4 |
+
from app.utils.logger import setup_logger
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
import faiss
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
logger = setup_logger(__name__)
|
| 12 |
+
|
| 13 |
+
def _normalize(vectors: np.ndarray) -> np.ndarray:
|
| 14 |
+
"""L2-normalize vectors so inner product equals cosine similarity."""
|
| 15 |
+
norms = np.linalg.norm(vectors, axis=1, keepdims=True) + 1e-12
|
| 16 |
+
return vectors / norms
|
| 17 |
+
|
| 18 |
+
class VectorStoreService:
|
| 19 |
+
"""
|
| 20 |
+
Manages a Faiss index + sidecar payload store.
|
| 21 |
+
- Index: Faiss IndexFlatIP (cosine via normalization)
|
| 22 |
+
- Payloads: JSON list aligned to vector IDs
|
| 23 |
+
- Persistence: saves/loads index + payloads from disk
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self):
|
| 27 |
+
self.index: Optional[faiss.Index] = None
|
| 28 |
+
self.dimension: Optional[int] = None
|
| 29 |
+
self.payloads: List[Dict[str, Any]] = []
|
| 30 |
+
self.index_path = settings.FAISS_INDEX_PATH
|
| 31 |
+
self.payloads_path = settings.FAISS_PAYLOADS_PATH
|
| 32 |
+
|
| 33 |
+
self._load_if_exists()
|
| 34 |
+
|
| 35 |
+
# ---------- Persistence ----------
|
| 36 |
+
|
| 37 |
+
def _load_if_exists(self):
|
| 38 |
+
"""Load index + payloads if the files exist."""
|
| 39 |
+
if os.path.exists(self.index_path) and os.path.exists(self.payloads_path):
|
| 40 |
+
try:
|
| 41 |
+
self.index = faiss.read_index(self.index_path)
|
| 42 |
+
self.dimension = self.index.d # type: ignore[attr-defined]
|
| 43 |
+
with open(self.payloads_path, "r", encoding="utf-8") as f:
|
| 44 |
+
self.payloads = json.load(f)
|
| 45 |
+
logger.info(
|
| 46 |
+
f"Loaded Faiss index ({self.dimension}d) with {self.index.ntotal} vectors" # type: ignore
|
| 47 |
+
)
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logger.error(f"Failed to load Faiss store; starting fresh. Error: {e}")
|
| 50 |
+
self.index = None
|
| 51 |
+
self.payloads = []
|
| 52 |
+
self.dimension = None
|
| 53 |
+
|
| 54 |
+
def _save(self):
|
| 55 |
+
"""Persist index + payloads to disk."""
|
| 56 |
+
if self.index is not None:
|
| 57 |
+
faiss.write_index(self.index, self.index_path)
|
| 58 |
+
with open(self.payloads_path, "w", encoding="utf-8") as f:
|
| 59 |
+
json.dump(self.payloads, f, ensure_ascii=False)
|
| 60 |
+
|
| 61 |
+
# ---------- Collection lifecycle ----------
|
| 62 |
+
|
| 63 |
+
def create_collection(self, vector_size: int):
|
| 64 |
+
"""
|
| 65 |
+
(Re)create a fresh Faiss index (cosine via normalized vectors).
|
| 66 |
+
WARNING: This clears existing data.
|
| 67 |
+
"""
|
| 68 |
+
self.dimension = vector_size
|
| 69 |
+
self.index = faiss.IndexFlatIP(vector_size) # inner product
|
| 70 |
+
self.payloads = []
|
| 71 |
+
self._save()
|
| 72 |
+
logger.info(f"Created Faiss collection: dim={vector_size}")
|
| 73 |
+
|
| 74 |
+
# ---------- Upsert/Search ----------
|
| 75 |
+
|
| 76 |
+
def upsert_vectors(
|
| 77 |
+
self,
|
| 78 |
+
vectors: List[List[float]],
|
| 79 |
+
payloads: List[Dict[str, Any]]
|
| 80 |
+
) -> int:
|
| 81 |
+
"""Insert vectors with metadata (IDs are implicit by order)."""
|
| 82 |
+
if self.index is None:
|
| 83 |
+
raise RuntimeError("Faiss index is not initialized. Call create_collection first.")
|
| 84 |
+
|
| 85 |
+
arr = np.array(vectors, dtype="float32")
|
| 86 |
+
arr = _normalize(arr)
|
| 87 |
+
self.index.add(arr) # type: ignore
|
| 88 |
+
self.payloads.extend(payloads)
|
| 89 |
+
|
| 90 |
+
self._save()
|
| 91 |
+
logger.info(f"Upserted {len(vectors)} vectors into Faiss")
|
| 92 |
+
return len(vectors)
|
| 93 |
+
|
| 94 |
+
def search(
|
| 95 |
+
self,
|
| 96 |
+
query_vector: List[float],
|
| 97 |
+
limit: int = 5,
|
| 98 |
+
score_threshold: float = 0.0
|
| 99 |
+
) -> List[Dict[str, Any]]:
|
| 100 |
+
"""Search similar vectors via inner product (cosine)."""
|
| 101 |
+
if self.index is None or self.index.ntotal == 0: # type: ignore
|
| 102 |
+
return []
|
| 103 |
+
|
| 104 |
+
q = np.array([query_vector], dtype="float32")
|
| 105 |
+
q = _normalize(q)
|
| 106 |
+
scores, indices = self.index.search(q, limit) # type: ignore
|
| 107 |
+
scores = scores[0].tolist()
|
| 108 |
+
indices = indices[0].tolist()
|
| 109 |
+
|
| 110 |
+
results: List[Dict[str, Any]] = []
|
| 111 |
+
for score, idx in zip(scores, indices):
|
| 112 |
+
if idx == -1:
|
| 113 |
+
continue
|
| 114 |
+
if score < score_threshold:
|
| 115 |
+
continue
|
| 116 |
+
payload = self.payloads[idx] if 0 <= idx < len(self.payloads) else {}
|
| 117 |
+
results.append({
|
| 118 |
+
"id": idx,
|
| 119 |
+
"score": float(score),
|
| 120 |
+
"payload": payload
|
| 121 |
+
})
|
| 122 |
+
return results
|
| 123 |
+
|
| 124 |
+
# ---------- Introspection/Access ----------
|
| 125 |
+
|
| 126 |
+
def get_collection_info(self) -> Dict[str, Any]:
|
| 127 |
+
count = int(self.index.ntotal) if self.index is not None else 0 # type: ignore
|
| 128 |
+
return {
|
| 129 |
+
"vectors_count": count,
|
| 130 |
+
"status": "ready" if count >= 0 else "uninitialized"
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
def get_all_payloads(self) -> List[Dict[str, Any]]:
|
| 134 |
+
"""Return all payloads (used by metrics)."""
|
| 135 |
+
return list(self.payloads)
|
| 136 |
+
|
| 137 |
+
def get_payloads_sample(self, limit: int = 100) -> List[Dict[str, Any]]:
|
| 138 |
+
return self.payloads[:limit]
|
| 139 |
+
|
| 140 |
+
# Global instance
|
| 141 |
+
vector_store = VectorStoreService()
|
app/services/vector_store.py.qdrant
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Qdrant vector store service"""
|
| 2 |
+
from qdrant_client import QdrantClient
|
| 3 |
+
from qdrant_client.http import models
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
+
from app.config import settings
|
| 6 |
+
from app.utils.logger import setup_logger
|
| 7 |
+
|
| 8 |
+
logger = setup_logger(__name__)
|
| 9 |
+
|
| 10 |
+
class VectorStoreService:
|
| 11 |
+
"""Manages Qdrant vector database operations"""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
"""Initialize Qdrant client"""
|
| 15 |
+
logger.info(f"Connecting to Qdrant at {settings.QDRANT_URL}")
|
| 16 |
+
self.client = QdrantClient(
|
| 17 |
+
url=settings.QDRANT_URL,
|
| 18 |
+
api_key=settings.QDRANT_API_KEY if settings.QDRANT_API_KEY else None
|
| 19 |
+
)
|
| 20 |
+
self.collection_name = settings.QDRANT_COLLECTION_NAME
|
| 21 |
+
|
| 22 |
+
def create_collection(self, vector_size: int):
|
| 23 |
+
"""Create or recreate the collection"""
|
| 24 |
+
try:
|
| 25 |
+
# Delete if exists
|
| 26 |
+
self.client.delete_collection(collection_name=self.collection_name)
|
| 27 |
+
logger.info(f"Deleted existing collection: {self.collection_name}")
|
| 28 |
+
except:
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
# Create new collection
|
| 32 |
+
self.client.create_collection(
|
| 33 |
+
collection_name=self.collection_name,
|
| 34 |
+
vectors_config=models.VectorParams(
|
| 35 |
+
size=vector_size,
|
| 36 |
+
distance=models.Distance.COSINE
|
| 37 |
+
)
|
| 38 |
+
)
|
| 39 |
+
logger.info(f"Created collection: {self.collection_name}")
|
| 40 |
+
|
| 41 |
+
def upsert_vectors(
|
| 42 |
+
self,
|
| 43 |
+
vectors: List[List[float]],
|
| 44 |
+
payloads: List[Dict[str, Any]]
|
| 45 |
+
) -> int:
|
| 46 |
+
"""Insert vectors with metadata"""
|
| 47 |
+
points = [
|
| 48 |
+
models.PointStruct(
|
| 49 |
+
id=idx,
|
| 50 |
+
vector=vector,
|
| 51 |
+
payload=payload
|
| 52 |
+
)
|
| 53 |
+
for idx, (vector, payload) in enumerate(zip(vectors, payloads))
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
self.client.upsert(
|
| 57 |
+
collection_name=self.collection_name,
|
| 58 |
+
points=points
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
logger.info(f"Upserted {len(points)} vectors")
|
| 62 |
+
return len(points)
|
| 63 |
+
|
| 64 |
+
def search(
|
| 65 |
+
self,
|
| 66 |
+
query_vector: List[float],
|
| 67 |
+
limit: int = 5,
|
| 68 |
+
score_threshold: float = 0.5
|
| 69 |
+
) -> List[Dict[str, Any]]:
|
| 70 |
+
"""Search for similar vectors"""
|
| 71 |
+
results = self.client.search(
|
| 72 |
+
collection_name=self.collection_name,
|
| 73 |
+
query_vector=query_vector,
|
| 74 |
+
limit=limit,
|
| 75 |
+
score_threshold=score_threshold
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
return [
|
| 79 |
+
{
|
| 80 |
+
"id": result.id,
|
| 81 |
+
"score": result.score,
|
| 82 |
+
"payload": result.payload
|
| 83 |
+
}
|
| 84 |
+
for result in results
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
def get_collection_info(self) -> Dict[str, Any]:
|
| 88 |
+
"""Get collection statistics"""
|
| 89 |
+
info = self.client.get_collection(collection_name=self.collection_name)
|
| 90 |
+
return {
|
| 91 |
+
"vectors_count": info.vectors_count,
|
| 92 |
+
"status": info.status
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
# Global instance
|
| 96 |
+
vector_store = VectorStoreService()
|
app/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Utility modules"""
|
app/utils/logger.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Logging configuration"""
|
| 2 |
+
import logging
|
| 3 |
+
import sys
|
| 4 |
+
|
| 5 |
+
def setup_logger(name: str) -> logging.Logger:
|
| 6 |
+
"""Configure and return a logger instance"""
|
| 7 |
+
logger = logging.getLogger(name)
|
| 8 |
+
logger.setLevel(logging.INFO)
|
| 9 |
+
|
| 10 |
+
if not logger.handlers:
|
| 11 |
+
handler = logging.StreamHandler(sys.stdout)
|
| 12 |
+
handler.setLevel(logging.INFO)
|
| 13 |
+
formatter = logging.Formatter(
|
| 14 |
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 15 |
+
)
|
| 16 |
+
handler.setFormatter(formatter)
|
| 17 |
+
logger.addHandler(handler)
|
| 18 |
+
|
| 19 |
+
return logger
|
app/utils/response_builder.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper functions for building API responses"""
|
| 2 |
+
from typing import Optional, List, Dict, Any
|
| 3 |
+
from app.models.jira_schema import QueryResponse, ChartData
|
| 4 |
+
|
| 5 |
+
def build_query_response(
|
| 6 |
+
answer: str,
|
| 7 |
+
chart_type: Optional[str] = None,
|
| 8 |
+
chart_data: Optional[List[Dict]] = None,
|
| 9 |
+
sources: Optional[List[str]] = None
|
| 10 |
+
) -> QueryResponse:
|
| 11 |
+
"""Build a structured query response"""
|
| 12 |
+
chart = None
|
| 13 |
+
if chart_type and chart_data:
|
| 14 |
+
chart = ChartData(type=chart_type, data=chart_data)
|
| 15 |
+
|
| 16 |
+
return QueryResponse(
|
| 17 |
+
answer=answer,
|
| 18 |
+
chart=chart,
|
| 19 |
+
sources=sources
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
def extract_chart_intent(query: str) -> Optional[str]:
|
| 23 |
+
"""Determine if query requires visualization"""
|
| 24 |
+
chart_keywords = {
|
| 25 |
+
"bar": ["compare", "by project", "breakdown", "distribution"],
|
| 26 |
+
"line": ["trend", "over time", "timeline", "progress"],
|
| 27 |
+
"pie": ["percentage", "proportion", "share"]
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
query_lower = query.lower()
|
| 31 |
+
for chart_type, keywords in chart_keywords.items():
|
| 32 |
+
if any(keyword in query_lower for keyword in keywords):
|
| 33 |
+
return chart_type
|
| 34 |
+
|
| 35 |
+
return None
|
app/utils/vector_store.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Qdrant vector store service"""
|
| 2 |
+
from qdrant_client import QdrantClient
|
| 3 |
+
from qdrant_client.http import models
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
+
from app.config import settings
|
| 6 |
+
from app.utils.logger import setup_logger
|
| 7 |
+
|
| 8 |
+
logger = setup_logger(__name__)
|
| 9 |
+
|
| 10 |
+
class VectorStoreService:
|
| 11 |
+
"""Manages Qdrant vector database operations"""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
"""Initialize Qdrant client"""
|
| 15 |
+
logger.info(f"Connecting to Qdrant at {settings.QDRANT_URL}")
|
| 16 |
+
self.client = QdrantClient(
|
| 17 |
+
url=settings.QDRANT_URL,
|
| 18 |
+
api_key=settings.QDRANT_API_KEY if settings.QDRANT_API_KEY else None
|
| 19 |
+
)
|
| 20 |
+
self.collection_name = settings.QDRANT_COLLECTION_NAME
|
| 21 |
+
|
| 22 |
+
def create_collection(self, vector_size: int):
|
| 23 |
+
"""Create or recreate the collection"""
|
| 24 |
+
try:
|
| 25 |
+
# Delete if exists
|
| 26 |
+
self.client.delete_collection(collection_name=self.collection_name)
|
| 27 |
+
logger.info(f"Deleted existing collection: {self.collection_name}")
|
| 28 |
+
except:
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
# Create new collection
|
| 32 |
+
self.client.create_collection(
|
| 33 |
+
collection_name=self.collection_name,
|
| 34 |
+
vectors_config=models.VectorParams(
|
| 35 |
+
size=vector_size,
|
| 36 |
+
distance=models.Distance.COSINE
|
| 37 |
+
)
|
| 38 |
+
)
|
| 39 |
+
logger.info(f"Created collection: {self.collection_name}")
|
| 40 |
+
|
| 41 |
+
def upsert_vectors(
|
| 42 |
+
self,
|
| 43 |
+
vectors: List[List[float]],
|
| 44 |
+
payloads: List[Dict[str, Any]]
|
| 45 |
+
) -> int:
|
| 46 |
+
"""Insert vectors with metadata"""
|
| 47 |
+
points = [
|
| 48 |
+
models.PointStruct(
|
| 49 |
+
id=idx,
|
| 50 |
+
vector=vector,
|
| 51 |
+
payload=payload
|
| 52 |
+
)
|
| 53 |
+
for idx, (vector, payload) in enumerate(zip(vectors, payloads))
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
self.client.upsert(
|
| 57 |
+
collection_name=self.collection_name,
|
| 58 |
+
points=points
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
logger.info(f"Upserted {len(points)} vectors")
|
| 62 |
+
return len(points)
|
| 63 |
+
|
| 64 |
+
def search(
|
| 65 |
+
self,
|
| 66 |
+
query_vector: List[float],
|
| 67 |
+
limit: int = 5,
|
| 68 |
+
score_threshold: float = 0.5
|
| 69 |
+
) -> List[Dict[str, Any]]:
|
| 70 |
+
"""Search for similar vectors"""
|
| 71 |
+
results = self.client.search(
|
| 72 |
+
collection_name=self.collection_name,
|
| 73 |
+
query_vector=query_vector,
|
| 74 |
+
limit=limit,
|
| 75 |
+
score_threshold=score_threshold
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
return [
|
| 79 |
+
{
|
| 80 |
+
"id": result.id,
|
| 81 |
+
"score": result.score,
|
| 82 |
+
"payload": result.payload
|
| 83 |
+
}
|
| 84 |
+
for result in results
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
def get_collection_info(self) -> Dict[str, Any]:
|
| 88 |
+
"""Get collection statistics"""
|
| 89 |
+
info = self.client.get_collection(collection_name=self.collection_name)
|
| 90 |
+
return {
|
| 91 |
+
"vectors_count": info.vectors_count,
|
| 92 |
+
"status": info.status
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
# Global instance
|
| 96 |
+
vector_store = VectorStoreService()
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.109.0
|
| 2 |
+
uvicorn[standard]==0.27.0
|
| 3 |
+
python-dotenv==1.0.0
|
| 4 |
+
pandas==2.2.0
|
| 5 |
+
numpy==1.26.3
|
| 6 |
+
sentence-transformers==2.3.1
|
| 7 |
+
faiss-cpu==1.7.4
|
| 8 |
+
qdrant-client==1.7.3
|
| 9 |
+
pydantic==2.5.3
|
| 10 |
+
python-multipart==0.0.6
|
| 11 |
+
requests==2.31.0
|