|
|
""" |
|
|
Integration Tests for FinEE Extractor (Pipeline). |
|
|
|
|
|
Tests the full 5-tier pipeline including: |
|
|
- Additive merge (Regex + Rules + LLM) |
|
|
- Graceful degradation (No backend) |
|
|
- Mocked LLM responses |
|
|
""" |
|
|
|
|
|
import pytest |
|
|
from unittest.mock import MagicMock, patch |
|
|
from finee.extractor import FinEE, ExtractionConfig |
|
|
from finee.schema import ExtractionResult, TransactionType, Category, ExtractionSource |
|
|
from finee.backends import BaseBackend |
|
|
|
|
|
|
|
|
class MockBackend(BaseBackend): |
|
|
def is_available(self): |
|
|
return True |
|
|
|
|
|
def load_model(self, path=None): |
|
|
self._loaded = True |
|
|
return True |
|
|
|
|
|
def generate(self, prompt, **kwargs): |
|
|
|
|
|
if "merchant" in prompt.lower(): |
|
|
return "Uber" |
|
|
if "category" in prompt.lower(): |
|
|
return "transport" |
|
|
if "date" in prompt.lower(): |
|
|
return "01-01-2025" |
|
|
return "" |
|
|
|
|
|
@pytest.fixture |
|
|
def extractor_no_llm(): |
|
|
"""Extractor with LLM disabled (Regex + Rules only).""" |
|
|
config = ExtractionConfig(use_llm=False) |
|
|
return FinEE(config) |
|
|
|
|
|
@pytest.fixture |
|
|
def extractor_with_mock_llm(): |
|
|
"""Extractor with Mock LLM (Full Pipeline).""" |
|
|
config = ExtractionConfig(use_llm=True) |
|
|
extractor = FinEE(config) |
|
|
extractor._backend = MockBackend() |
|
|
extractor._backend_loaded = True |
|
|
return extractor |
|
|
|
|
|
def test_tier1_regex_only(extractor_no_llm): |
|
|
"""Test Tier 1 regex extraction works without LLM.""" |
|
|
text = "Rs.500.00 debited from A/c 1234" |
|
|
result = extractor_no_llm.extract(text) |
|
|
|
|
|
assert result.amount == 500.0 |
|
|
assert result.type == TransactionType.DEBIT |
|
|
assert result.account == "1234" |
|
|
assert result.confidence_score > 0.0 |
|
|
|
|
|
def test_tier2_rules_enrichment(extractor_no_llm): |
|
|
"""Test Tier 2 rules (VPA -> Merchant) works without LLM.""" |
|
|
text = "Rs.250 paid to swiggy@ybl" |
|
|
result = extractor_no_llm.extract(text) |
|
|
|
|
|
assert result.amount == 250.0 |
|
|
assert result.vpa == "swiggy@ybl" |
|
|
assert result.merchant == "Swiggy" |
|
|
assert result.category == Category.FOOD |
|
|
|
|
|
def test_tier3_additive_merge(extractor_with_mock_llm): |
|
|
""" |
|
|
Test Additive Merge: |
|
|
- Tier 1 gets Amount |
|
|
- Tier 3 Mock LLM gets Merchant/Category |
|
|
""" |
|
|
text = "Rs.500 paid for taxi ride" |
|
|
|
|
|
|
|
|
result = extractor_with_mock_llm.extract(text) |
|
|
|
|
|
|
|
|
assert result.amount == 500.0 |
|
|
assert result.merchant == "Uber" |
|
|
assert result.category == Category.TRANSPORT |
|
|
|
|
|
|
|
|
assert result.meta['amount'].source == ExtractionSource.REGEX |
|
|
assert result.meta['merchant'].source == ExtractionSource.LLM |
|
|
|
|
|
def test_graceful_degradation(): |
|
|
"""Test that pipeline works even if LLM fails/is missing.""" |
|
|
config = ExtractionConfig(use_llm=True) |
|
|
extractor = FinEE(config) |
|
|
|
|
|
|
|
|
extractor._backend = None |
|
|
extractor._backend_loaded = True |
|
|
|
|
|
text = "Rs.100 debited" |
|
|
result = extractor.extract(text) |
|
|
|
|
|
|
|
|
assert result.amount == 100.0 |
|
|
assert result.type == TransactionType.DEBIT |
|
|
|
|
|
|
|
|
def test_full_pipeline_consistency(): |
|
|
"""Test consistent output through full pipeline.""" |
|
|
|
|
|
text = "Rs.2500 debited from HDFC A/c 1234 to swiggy@ybl on 01-01-2025" |
|
|
|
|
|
config = ExtractionConfig(use_llm=False) |
|
|
extractor = FinEE(config) |
|
|
|
|
|
result = extractor.extract(text) |
|
|
|
|
|
assert result.is_complete() |
|
|
assert result.amount == 2500.0 |
|
|
assert result.merchant == "Swiggy" |
|
|
assert result.category == Category.FOOD |
|
|
assert result.date == "01-01-2025" |
|
|
|