File size: 6,422 Bytes
dcc24f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
"""
FinEE Confidence - Scoring logic for extraction results.

Calculates confidence scores based on:
- Source of extraction (regex > rules > LLM)
- Completeness of fields
- Consistency between sources
"""

from typing import Dict, List, Optional
from .schema import ExtractionResult, Confidence, ExtractionSource, FieldMeta


# Field weights for confidence calculation
FIELD_WEIGHTS = {
    'amount': 0.25,       # Critical field
    'type': 0.15,         # Critical field
    'date': 0.15,
    'account': 0.10,
    'reference': 0.10,
    'merchant': 0.10,
    'category': 0.10,
    'vpa': 0.05,
}

# Source reliability scores
SOURCE_SCORES = {
    ExtractionSource.REGEX: 0.95,
    ExtractionSource.RULES: 0.85,
    ExtractionSource.LLM: 0.70,
    ExtractionSource.CACHE: 1.0,  # Cached results are already validated
}


def calculate_confidence_score(result: ExtractionResult) -> float:
    """
    Calculate overall confidence score (0.0 to 1.0).
    
    Args:
        result: Extraction result with metadata
        
    Returns:
        Confidence score between 0.0 and 1.0
    """
    if not result:
        return 0.0
    
    total_weight = 0.0
    weighted_score = 0.0
    
    for field_name, weight in FIELD_WEIGHTS.items():
        value = getattr(result, field_name, None)
        
        if value is not None:
            # Get source-based score
            if field_name in result.meta:
                source = result.meta[field_name].source
                field_score = SOURCE_SCORES.get(source, 0.5)
                
                # Apply field-specific confidence if available
                if result.meta[field_name].confidence:
                    field_score *= result.meta[field_name].confidence
            else:
                # Default score for fields without metadata
                field_score = 0.5
            
            weighted_score += weight * field_score
            total_weight += weight
    
    if total_weight == 0:
        return 0.0
    
    return weighted_score / total_weight


def calculate_completeness(result: ExtractionResult,
                          required: List[str] = None,
                          desired: List[str] = None) -> float:
    """
    Calculate field completeness score.
    
    Args:
        result: Extraction result
        required: List of required field names
        desired: List of desired field names
        
    Returns:
        Completeness score (0.0 to 1.0)
    """
    if required is None:
        required = ['amount', 'type']
    if desired is None:
        desired = ['merchant', 'category', 'date', 'reference']
    
    required_score = 0.0
    for field in required:
        if getattr(result, field, None) is not None:
            required_score += 1.0
    required_score /= len(required) if required else 1.0
    
    desired_score = 0.0
    for field in desired:
        if getattr(result, field, None) is not None:
            desired_score += 1.0
    desired_score /= len(desired) if desired else 1.0
    
    # Required fields are weighted more heavily
    return 0.7 * required_score + 0.3 * desired_score


def determine_confidence_level(score: float,
                              high_threshold: float = 0.9,
                              medium_threshold: float = 0.7) -> Confidence:
    """
    Determine confidence level from score.
    
    Args:
        score: Confidence score (0.0 to 1.0)
        high_threshold: Threshold for HIGH confidence
        medium_threshold: Threshold for MEDIUM confidence
        
    Returns:
        Confidence enum value
    """
    if score >= high_threshold:
        return Confidence.HIGH
    elif score >= medium_threshold:
        return Confidence.MEDIUM
    elif score > 0:
        return Confidence.LOW
    else:
        return Confidence.FAILED


def update_result_confidence(result: ExtractionResult,
                            high_threshold: float = 0.9,
                            medium_threshold: float = 0.7) -> ExtractionResult:
    """
    Update the confidence fields on an ExtractionResult.
    
    Args:
        result: Extraction result to update
        high_threshold: Threshold for HIGH confidence
        medium_threshold: Threshold for MEDIUM confidence
        
    Returns:
        Updated ExtractionResult
    """
    # Calculate score
    score = calculate_confidence_score(result)
    
    # Factor in completeness
    completeness = calculate_completeness(result)
    combined_score = 0.7 * score + 0.3 * completeness
    
    # Update result
    result.confidence_score = combined_score
    result.confidence = determine_confidence_level(
        combined_score, 
        high_threshold, 
        medium_threshold
    )
    
    return result


def should_use_llm(result: ExtractionResult,
                  required: List[str] = None,
                  desired: List[str] = None) -> bool:
    """
    Determine if LLM should be used for additional extraction.
    
    Args:
        result: Current extraction result
        required: Required fields
        desired: Desired fields
        
    Returns:
        True if LLM extraction is recommended
    """
    missing = result.get_missing_fields(required, desired)
    
    # Always use LLM if required fields are missing
    if required:
        for field in required:
            if field in missing:
                return True
    
    # Use LLM if more than half of desired fields are missing
    if desired:
        missing_desired = [f for f in missing if f in desired]
        if len(missing_desired) > len(desired) / 2:
            return True
    
    return False


def get_extraction_summary(result: ExtractionResult) -> Dict[str, str]:
    """
    Get a summary of extraction sources for each field.
    
    Args:
        result: Extraction result
        
    Returns:
        Dict mapping field names to source descriptions
    """
    summary = {}
    
    for field_name in FIELD_WEIGHTS.keys():
        value = getattr(result, field_name, None)
        
        if value is not None:
            if field_name in result.meta:
                source = result.meta[field_name].source.value
                conf = result.meta[field_name].confidence
                summary[field_name] = f"{source} ({conf:.0%})"
            else:
                summary[field_name] = "unknown"
        else:
            summary[field_name] = "missing"
    
    return summary