File size: 3,994 Bytes
dcc24f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
Integration Tests for FinEE Extractor (Pipeline).

Tests the full 5-tier pipeline including:
- Additive merge (Regex + Rules + LLM)
- Graceful degradation (No backend)
- Mocked LLM responses
"""

import pytest
from unittest.mock import MagicMock, patch
from finee.extractor import FinEE, ExtractionConfig
from finee.schema import ExtractionResult, TransactionType, Category, ExtractionSource
from finee.backends import BaseBackend

# Mock Backend for testing Tier 3
class MockBackend(BaseBackend):
    def is_available(self):
        return True
    
    def load_model(self, path=None):
        self._loaded = True
        return True
    
    def generate(self, prompt, **kwargs):
        # Respond based on prompt content
        if "merchant" in prompt.lower():
            return "Uber"
        if "category" in prompt.lower():
            return "transport"
        if "date" in prompt.lower():
            return "01-01-2025"
        return ""

@pytest.fixture
def extractor_no_llm():
    """Extractor with LLM disabled (Regex + Rules only)."""
    config = ExtractionConfig(use_llm=False)
    return FinEE(config)

@pytest.fixture
def extractor_with_mock_llm():
    """Extractor with Mock LLM (Full Pipeline)."""
    config = ExtractionConfig(use_llm=True)
    extractor = FinEE(config)
    extractor._backend = MockBackend()
    extractor._backend_loaded = True
    return extractor

def test_tier1_regex_only(extractor_no_llm):
    """Test Tier 1 regex extraction works without LLM."""
    text = "Rs.500.00 debited from A/c 1234"
    result = extractor_no_llm.extract(text)
    
    assert result.amount == 500.0
    assert result.type == TransactionType.DEBIT
    assert result.account == "1234"
    assert result.confidence_score > 0.0  # Should have some confidence

def test_tier2_rules_enrichment(extractor_no_llm):
    """Test Tier 2 rules (VPA -> Merchant) works without LLM."""
    text = "Rs.250 paid to swiggy@ybl"
    result = extractor_no_llm.extract(text)
    
    assert result.amount == 250.0
    assert result.vpa == "swiggy@ybl"
    assert result.merchant == "Swiggy"      # Tier 2
    assert result.category == Category.FOOD # Tier 2

def test_tier3_additive_merge(extractor_with_mock_llm):
    """
    Test Additive Merge:
    - Tier 1 gets Amount
    - Tier 3 Mock LLM gets Merchant/Category
    """
    text = "Rs.500 paid for taxi ride"
    
    # Mock LLM will return "Uber" and "transport" when asked
    result = extractor_with_mock_llm.extract(text)
    
    # Check mix of sources
    assert result.amount == 500.0             # Tier 1 (Regex)
    assert result.merchant == "Uber"          # Tier 3 (Mock LLM)
    assert result.category == Category.TRANSPORT # Tier 3 (Mock LLM)
    
    # Check source metadata
    assert result.meta['amount'].source == ExtractionSource.REGEX
    assert result.meta['merchant'].source == ExtractionSource.LLM

def test_graceful_degradation():
    """Test that pipeline works even if LLM fails/is missing."""
    config = ExtractionConfig(use_llm=True)
    extractor = FinEE(config)
    
    # Force backend to be None (simulate no backends installed)
    extractor._backend = None
    extractor._backend_loaded = True
    
    text = "Rs.100 debited"
    result = extractor.extract(text)
    
    # Should still get regex results
    assert result.amount == 100.0
    assert result.type == TransactionType.DEBIT
    # Should not crash

def test_full_pipeline_consistency():
    """Test consistent output through full pipeline."""
    # This text contains everything extractable by Regex + Rules
    text = "Rs.2500 debited from HDFC A/c 1234 to swiggy@ybl on 01-01-2025"
    
    config = ExtractionConfig(use_llm=False) # Pure deterministic pipeline
    extractor = FinEE(config)
    
    result = extractor.extract(text)
    
    assert result.is_complete()
    assert result.amount == 2500.0
    assert result.merchant == "Swiggy"
    assert result.category == Category.FOOD
    assert result.date == "01-01-2025"