finance-entity-extractor / tests /test_extractor.py

Ranjit Behera

FinEE v1.0 - Finance Entity Extractor

dcc24f8 21 days ago

3.99 kB

	"""
	Integration Tests for FinEE Extractor (Pipeline).

	Tests the full 5-tier pipeline including:
	- Additive merge (Regex + Rules + LLM)
	- Graceful degradation (No backend)
	- Mocked LLM responses
	"""

	import pytest
	from unittest.mock import MagicMock, patch
	from finee.extractor import FinEE, ExtractionConfig
	from finee.schema import ExtractionResult, TransactionType, Category, ExtractionSource
	from finee.backends import BaseBackend

	# Mock Backend for testing Tier 3
	class MockBackend(BaseBackend):
	def is_available(self):
	return True

	def load_model(self, path=None):
	self._loaded = True
	return True

	def generate(self, prompt, **kwargs):
	# Respond based on prompt content
	if "merchant" in prompt.lower():
	return "Uber"
	if "category" in prompt.lower():
	return "transport"
	if "date" in prompt.lower():
	return "01-01-2025"
	return ""

	@pytest.fixture
	def extractor_no_llm():
	"""Extractor with LLM disabled (Regex + Rules only)."""
	config = ExtractionConfig(use_llm=False)
	return FinEE(config)

	@pytest.fixture
	def extractor_with_mock_llm():
	"""Extractor with Mock LLM (Full Pipeline)."""
	config = ExtractionConfig(use_llm=True)
	extractor = FinEE(config)
	extractor._backend = MockBackend()
	extractor._backend_loaded = True
	return extractor

	def test_tier1_regex_only(extractor_no_llm):
	"""Test Tier 1 regex extraction works without LLM."""
	text = "Rs.500.00 debited from A/c 1234"
	result = extractor_no_llm.extract(text)

	assert result.amount == 500.0
	assert result.type == TransactionType.DEBIT
	assert result.account == "1234"
	assert result.confidence_score > 0.0 # Should have some confidence

	def test_tier2_rules_enrichment(extractor_no_llm):
	"""Test Tier 2 rules (VPA -> Merchant) works without LLM."""
	text = "Rs.250 paid to swiggy@ybl"
	result = extractor_no_llm.extract(text)

	assert result.amount == 250.0
	assert result.vpa == "swiggy@ybl"
	assert result.merchant == "Swiggy" # Tier 2
	assert result.category == Category.FOOD # Tier 2

	def test_tier3_additive_merge(extractor_with_mock_llm):
	"""
	Test Additive Merge:
	- Tier 1 gets Amount
	- Tier 3 Mock LLM gets Merchant/Category
	"""
	text = "Rs.500 paid for taxi ride"

	# Mock LLM will return "Uber" and "transport" when asked
	result = extractor_with_mock_llm.extract(text)

	# Check mix of sources
	assert result.amount == 500.0 # Tier 1 (Regex)
	assert result.merchant == "Uber" # Tier 3 (Mock LLM)
	assert result.category == Category.TRANSPORT # Tier 3 (Mock LLM)

	# Check source metadata
	assert result.meta['amount'].source == ExtractionSource.REGEX
	assert result.meta['merchant'].source == ExtractionSource.LLM

	def test_graceful_degradation():
	"""Test that pipeline works even if LLM fails/is missing."""
	config = ExtractionConfig(use_llm=True)
	extractor = FinEE(config)

	# Force backend to be None (simulate no backends installed)
	extractor._backend = None
	extractor._backend_loaded = True

	text = "Rs.100 debited"
	result = extractor.extract(text)

	# Should still get regex results
	assert result.amount == 100.0
	assert result.type == TransactionType.DEBIT
	# Should not crash

	def test_full_pipeline_consistency():
	"""Test consistent output through full pipeline."""
	# This text contains everything extractable by Regex + Rules
	text = "Rs.2500 debited from HDFC A/c 1234 to swiggy@ybl on 01-01-2025"

	config = ExtractionConfig(use_llm=False) # Pure deterministic pipeline
	extractor = FinEE(config)

	result = extractor.extract(text)

	assert result.is_complete()
	assert result.amount == 2500.0
	assert result.merchant == "Swiggy"
	assert result.category == Category.FOOD
	assert result.date == "01-01-2025"