finance-entity-extractor / benchmark.py
Ranjit Behera
Clean up repo structure and add benchmark
6a76e07
#!/usr/bin/env python3
"""
FinEE Benchmark Script
======================
Run this to verify accuracy on your own data.
Usage:
python benchmark.py # Run built-in tests
python benchmark.py --file data.jsonl # Test on your data
python benchmark.py --torture # Run edge case tests
Author: Ranjit Behera
"""
import json
import time
import argparse
from typing import Dict, List, Any
from dataclasses import dataclass
try:
from finee import extract, FinEE
from finee.schema import ExtractionConfig
except ImportError:
print("Install finee first: pip install finee")
exit(1)
@dataclass
class BenchmarkResult:
total: int = 0
correct: int = 0
field_accuracy: Dict[str, float] = None
avg_latency_ms: float = 0
def __post_init__(self):
if self.field_accuracy is None:
self.field_accuracy = {}
# ============================================================================
# BUILT-IN BENCHMARK DATA
# ============================================================================
BENCHMARK_DATA = [
# HDFC Bank
{
"text": "HDFC Bank: Rs.2500.00 debited from A/c XX3545 on 28-12-2025 to VPA swiggy@ybl. UPI Ref: 534567891234",
"expected": {"amount": 2500.0, "type": "debit", "account": "3545", "merchant": "Swiggy", "category": "food"}
},
{
"text": "HDFC: INR 15000 credited to A/c 9876 on 15-01-2025. NEFT from RAHUL SHARMA. Ref: HDFC25011512345",
"expected": {"amount": 15000.0, "type": "credit", "account": "9876"}
},
# ICICI Bank
{
"text": "ICICI: Rs.1,250.50 debited from Acct XX4321 on 10-01-25 to amazon@apl. Ref: 987654321012",
"expected": {"amount": 1250.50, "type": "debit", "account": "4321", "merchant": "Amazon", "category": "shopping"}
},
# SBI
{
"text": "SBI: Rs.350 debited from a/c XX1234 on 10-01-25. UPI txn to zomato@paytm. Ref: 456789012345",
"expected": {"amount": 350.0, "type": "debit", "account": "1234", "merchant": "Zomato", "category": "food"}
},
# Axis Bank
{
"text": "Axis Bank: INR 800.00 debited from A/c 5678 on 05-01-2025. Info: UPI-UBER. Bal: Rs.12,500",
"expected": {"amount": 800.0, "type": "debit", "account": "5678", "merchant": "Uber", "category": "transport"}
},
# Kotak
{
"text": "Rs.2000 credited to Kotak A/c XX4321 on 20-01-2025 from rahul.sharma@okicici. Ref: 321654987012",
"expected": {"amount": 2000.0, "type": "credit", "account": "4321"}
},
# Payment Apps
{
"text": "PhonePe: Paid Rs.150 to swiggy@ybl from A/c XX1234. UPI Ref: 123456789012",
"expected": {"amount": 150.0, "type": "debit", "merchant": "Swiggy", "category": "food"}
},
{
"text": "GPay: Sent Rs.500 to uber@paytm from HDFC Bank XX9876. Txn ID: GPY987654321",
"expected": {"amount": 500.0, "type": "debit", "merchant": "Uber", "category": "transport"}
},
]
# ============================================================================
# TORTURE TEST DATA (Edge Cases)
# ============================================================================
TORTURE_TESTS = [
# Missing spaces
{
"text": "Rs.500.00debited from HDFC A/c1234 on01-01-25",
"expected": {"amount": 500.0, "type": "debit", "account": "1234"},
"difficulty": "Missing spaces"
},
# Weird formatting
{
"text": "HDFC:Rs 2,500/-debited A/c XX3545 dt:28/12/25 VPA-swiggy@ybl Ref534567891234",
"expected": {"amount": 2500.0, "type": "debit", "account": "3545"},
"difficulty": "Non-standard formatting"
},
# Mixed case
{
"text": "Your A/C XXXX1234 is DEBITED for RS. 1500 on 15-JAN-25. VPA: SWIGGY@YBL",
"expected": {"amount": 1500.0, "type": "debit", "account": "1234"},
"difficulty": "Mixed case"
},
# Truncated SMS
{
"text": "Rs.2500 debited from A/c...3545 to swi...",
"expected": {"amount": 2500.0, "type": "debit"},
"difficulty": "Truncated message"
},
# Extra noise
{
"text": "ALERT! Dear Customer, Rs.500.00 has been debited from your account XX1234 on 01-01-2025. For disputes call 1800-XXX-XXXX. Ignore if done by you.",
"expected": {"amount": 500.0, "type": "debit", "account": "1234"},
"difficulty": "Extra noise/marketing"
},
# Multiple amounts
{
"text": "Rs.500 debited from A/c 1234. Bal: Rs.15,000. Min due: Rs.2000",
"expected": {"amount": 500.0, "type": "debit", "account": "1234"},
"difficulty": "Multiple amounts (balance, due)"
},
# Unicode symbols
{
"text": "โ‚น2,500 debited from A/c โ€ขโ€ขโ€ขโ€ข 3545 on 28-12-25",
"expected": {"amount": 2500.0, "type": "debit", "account": "3545"},
"difficulty": "Unicode symbols (โ‚น, โ€ข)"
},
# Lakhs notation
{
"text": "INR 1.5 Lakh credited to your A/c 9876 on 15-01-25",
"expected": {"amount": 150000.0, "type": "credit", "account": "9876"},
"difficulty": "Lakhs notation"
},
]
def normalize(val):
"""Normalize value for comparison."""
if val is None:
return None
if isinstance(val, (int, float)):
return float(val)
if hasattr(val, 'value'): # Enum
return val.value.lower()
return str(val).lower().strip()
def compare(expected: Dict, result) -> Dict[str, bool]:
"""Compare expected vs actual."""
matches = {}
for field, exp_val in expected.items():
actual_val = getattr(result, field, None)
exp_norm = normalize(exp_val)
act_norm = normalize(actual_val)
matches[field] = exp_norm == act_norm
return matches
def run_benchmark(data: List[Dict], name: str = "Benchmark") -> BenchmarkResult:
"""Run benchmark on dataset."""
result = BenchmarkResult()
result.total = len(data)
field_correct = {}
field_total = {}
latencies = []
print(f"\n{'='*70}")
print(f"๐Ÿ“Š {name} ({len(data)} samples)")
print(f"{'='*70}\n")
for i, sample in enumerate(data):
text = sample["text"]
expected = sample["expected"]
difficulty = sample.get("difficulty", "")
start = time.time()
r = extract(text)
latency = (time.time() - start) * 1000
latencies.append(latency)
matches = compare(expected, r)
all_match = all(matches.values())
if all_match:
result.correct += 1
status = "โœ…"
else:
status = "โŒ"
# Track field accuracy
for field, matched in matches.items():
if field not in field_total:
field_total[field] = 0
field_correct[field] = 0
field_total[field] += 1
if matched:
field_correct[field] += 1
# Print result
if difficulty:
print(f"{status} [{difficulty}]")
else:
print(f"{status} Sample {i+1}")
if not all_match:
print(f" Input: {text[:60]}...")
for field, matched in matches.items():
if not matched:
actual = getattr(r, field, None)
exp = expected[field]
print(f" {field}: expected={exp}, got={actual}")
print()
# Calculate field accuracy
result.field_accuracy = {
field: field_correct[field] / field_total[field] * 100
for field in field_total
}
result.avg_latency_ms = sum(latencies) / len(latencies)
# Print summary
print(f"\n{'='*70}")
print(f"๐Ÿ“ˆ SUMMARY: {name}")
print(f"{'='*70}")
print(f"Overall Accuracy: {result.correct}/{result.total} ({result.correct/result.total*100:.1f}%)")
print(f"Average Latency: {result.avg_latency_ms:.2f}ms")
print(f"\nField Accuracy:")
for field, acc in sorted(result.field_accuracy.items()):
status = "โœ…" if acc >= 90 else "โš ๏ธ" if acc >= 70 else "โŒ"
print(f" {field:12} {acc:5.1f}% {status}")
print(f"{'='*70}\n")
return result
def run_user_file(filepath: str) -> BenchmarkResult:
"""Run benchmark on user's JSONL file."""
data = []
with open(filepath) as f:
for line in f:
if line.strip():
data.append(json.loads(line))
return run_benchmark(data, f"User Data ({filepath})")
def main():
parser = argparse.ArgumentParser(description="FinEE Benchmark")
parser.add_argument("--file", "-f", help="Path to JSONL file with test data")
parser.add_argument("--torture", "-t", action="store_true", help="Run torture tests (edge cases)")
parser.add_argument("--all", "-a", action="store_true", help="Run all benchmarks")
args = parser.parse_args()
print("\n" + "="*70)
print("๐Ÿฆ FinEE BENCHMARK SUITE")
print("="*70)
print("Testing extraction accuracy on Indian banking messages...")
if args.file:
run_user_file(args.file)
elif args.torture:
run_benchmark(TORTURE_TESTS, "Torture Tests (Edge Cases)")
elif args.all:
run_benchmark(BENCHMARK_DATA, "Standard Benchmark")
run_benchmark(TORTURE_TESTS, "Torture Tests (Edge Cases)")
else:
run_benchmark(BENCHMARK_DATA, "Standard Benchmark")
print("\nโœ… Benchmark complete!")
print("To test on your own data:")
print(' python benchmark.py --file your_data.jsonl')
print("\nJSONL format:")
print(' {"text": "Rs.500 debited...", "expected": {"amount": 500, "type": "debit"}}')
if __name__ == "__main__":
main()