File size: 3,707 Bytes
af2f8e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
RAGAS API Endpoints
"""
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import List, Optional

router = APIRouter()

# We'll set these from main.py
ragas_pipeline = None
ragas_evaluator = None


class RagasEvalRequest(BaseModel):
    """Direct evaluation request."""
    query: str
    answer: str
    contexts: List[str]


class RagasQueryRequest(BaseModel):
    """Query + evaluate request."""
    query: str
    top_k: int = 3


def init_ragas_router(pipeline, evaluator):
    """Initialize router with pipeline and evaluator instances."""
    global ragas_pipeline, ragas_evaluator
    ragas_pipeline = pipeline
    ragas_evaluator = evaluator
    return router


@router.post("/evaluate")
async def evaluate_direct(request: RagasEvalRequest):
    """
    Evaluate a query-answer pair directly.
    
    Use this when you already have the answer and contexts.
    """
    if not ragas_evaluator:
        raise HTTPException(status_code=503, detail="RAGAS not initialized")
    
    result = await ragas_evaluator.evaluate_single(
        query=request.query,
        answer=request.answer,
        contexts=request.contexts
    )
    
    return {
        "eval_id": result.eval_id,
        "faithfulness": result.faithfulness,
        "context_precision": result.context_precision,
        "ragas_score": result.ragas_score,
        "latency_ms": round(result.latency_ms, 2)
    }


@router.post("/query-and-evaluate")
async def query_and_evaluate(request: RagasQueryRequest):
    """
    Query the RAG system AND evaluate the response.
    
    Returns both the answer and RAGAS metrics.
    """
    if not ragas_pipeline or not ragas_evaluator:
        raise HTTPException(status_code=503, detail="RAGAS not initialized")
    
    # Step 1: Query pipeline
    response = ragas_pipeline.query_for_evaluation(
        query=request.query,
        top_k=request.top_k
    )
    
    if response.status != "success":
        return {
            "query": response.query,
            "answer": response.answer,
            "status": response.status,
            "ragas": None
        }
    
    # Step 2: Evaluate with RAGAS
    eval_result = await ragas_evaluator.evaluate_single(
        query=response.query,
        answer=response.answer,
        contexts=response.contexts
    )
    
    return {
        "query": response.query,
        "answer": response.answer,
        "sources": response.sources,
        "chunks_used": response.chunks_used,
        "response_time_ms": round(response.response_time_ms, 2),
        "ragas": {
            "eval_id": eval_result.eval_id,
            "faithfulness": eval_result.faithfulness,
            "context_precision": eval_result.context_precision,
            "ragas_score": eval_result.ragas_score,
            "eval_time_ms": round(eval_result.latency_ms, 2)
        }
    }


@router.get("/metrics")
async def get_metrics():
    """Get aggregate RAGAS metrics from all evaluations."""
    if not ragas_evaluator:
        raise HTTPException(status_code=503, detail="RAGAS not initialized")
    
    results = ragas_evaluator.results
    
    if not results:
        return {"total_evaluations": 0, "message": "No evaluations yet"}
    
    # Calculate averages
    avg_faithfulness = sum(r.faithfulness for r in results) / len(results)
    avg_precision = sum(r.context_precision for r in results) / len(results)
    avg_ragas = sum(r.ragas_score for r in results) / len(results)
    
    return {
        "total_evaluations": len(results),
        "avg_faithfulness": round(avg_faithfulness, 3),
        "avg_context_precision": round(avg_precision, 3),
        "avg_ragas_score": round(avg_ragas, 3)
    }