File size: 2,098 Bytes
4d592a4
4ba4b25
 
4d592a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from typing import List
from backend.app.models.schemas import Source
from backend.app.services.embeddings import embedding_service


class ConfidenceService:
    def calculate_confidence(
        self, query: str, answer: str, sources: List[Source]
    ) -> float:
        factors = {}

        # Factor 1: Source count (25%)
        source_count = len(sources)
        if source_count == 0:
            factors["source_count"] = 0
        elif source_count == 1:
            factors["source_count"] = 15
        elif source_count == 2:
            factors["source_count"] = 20
        else:
            factors["source_count"] = 25

        # Factor 2: Average relevance score (30%)
        if sources:
            avg_relevance = sum(s.relevance_score or 0 for s in sources) / len(sources)
            factors["source_relevance"] = avg_relevance * 30
        else:
            factors["source_relevance"] = 0

        # Factor 3: Semantic similarity (30%)
        try:
            query_emb = embedding_service.embed_text(query)
            answer_text = answer[:1000] if len(answer) > 1000 else answer
            answer_emb = embedding_service.embed_text(answer_text)
            similarity = embedding_service.calculate_similarity(query_emb, answer_emb)
            factors["semantic_similarity"] = similarity * 30
        except Exception:
            factors["semantic_similarity"] = 0

        # Factor 4: Citation density (15%)
        citation_count = answer.count("[Source")
        if citation_count == 0:
            factors["citation_density"] = 0
        elif citation_count == 1:
            factors["citation_density"] = 10
        elif citation_count == 2:
            factors["citation_density"] = 13
        else:
            factors["citation_density"] = 15

        # Calculate total
        total_confidence = sum(factors.values())

        # Ensure minimum confidence for valid responses
        if source_count > 0 and total_confidence < 30:
            total_confidence = 35

        return round(min(total_confidence, 100), 2)


confidence_service = ConfidenceService()