|
|
from fastapi import FastAPI, HTTPException, Query |
|
|
from pydantic import BaseModel |
|
|
from typing import List, Optional, Dict, Any |
|
|
from datetime import datetime, date |
|
|
import re |
|
|
from difflib import SequenceMatcher |
|
|
import uvicorn |
|
|
|
|
|
app = FastAPI( |
|
|
title="Transaction Reconciliation API", |
|
|
description="Reconcile bank and credit card transactions using fuzzy matching", |
|
|
version="1.0.0" |
|
|
) |
|
|
|
|
|
|
|
|
class Transaction(BaseModel): |
|
|
id: str |
|
|
date: str |
|
|
amount: float |
|
|
description: str |
|
|
type: str |
|
|
reference_number: Optional[str] = None |
|
|
|
|
|
class ReconciliationInput(BaseModel): |
|
|
bank_transactions: List[Transaction] |
|
|
credit_card_transactions: List[Transaction] |
|
|
|
|
|
class MatchedTransaction(BaseModel): |
|
|
bank_id: str |
|
|
credit_card_id: str |
|
|
match_score: float |
|
|
match_reason: str |
|
|
description: str |
|
|
amount: float |
|
|
|
|
|
class UnmatchedTransaction(BaseModel): |
|
|
id: str |
|
|
date: str |
|
|
amount: float |
|
|
description: str |
|
|
type: str |
|
|
reference_number: Optional[str] = None |
|
|
|
|
|
class ReconciliationOutput(BaseModel): |
|
|
matched_transactions: List[MatchedTransaction] |
|
|
unmatched_bank_transactions: List[UnmatchedTransaction] |
|
|
unmatched_credit_card_transactions: List[UnmatchedTransaction] |
|
|
|
|
|
class ReconciliationService: |
|
|
def __init__(self, |
|
|
description_threshold: float = 0.7, |
|
|
amount_tolerance: float = 0.01, |
|
|
max_date_diff_days: int = 7): |
|
|
self.description_threshold = description_threshold |
|
|
self.amount_tolerance = amount_tolerance |
|
|
self.max_date_diff_days = max_date_diff_days |
|
|
|
|
|
def fuzzy_match_description(self, desc1: str, desc2: str) -> float: |
|
|
"""Calculate fuzzy match score between two descriptions""" |
|
|
|
|
|
clean_desc1 = self._clean_description(desc1.lower()) |
|
|
clean_desc2 = self._clean_description(desc2.lower()) |
|
|
|
|
|
|
|
|
similarity = SequenceMatcher(None, clean_desc1, clean_desc2).ratio() |
|
|
|
|
|
|
|
|
if self._check_common_patterns(clean_desc1, clean_desc2): |
|
|
similarity = max(similarity, 0.8) |
|
|
|
|
|
return similarity |
|
|
|
|
|
def _clean_description(self, description: str) -> str: |
|
|
"""Clean description for better matching""" |
|
|
|
|
|
cleaned = re.sub(r'[^\w\s]', ' ', description) |
|
|
cleaned = re.sub(r'\s+', ' ', cleaned).strip() |
|
|
return cleaned |
|
|
|
|
|
def _check_common_patterns(self, desc1: str, desc2: str) -> bool: |
|
|
"""Check for common transaction patterns""" |
|
|
patterns = [ |
|
|
(r'uber', r'uber'), |
|
|
(r'amazon|amzn', r'amazon|amzn'), |
|
|
(r'invoice\s*#?\s*(\d+)', r'invoice\s*#?\s*(\d+)'), |
|
|
(r'payment.*invoice', r'payment.*invoice'), |
|
|
(r'trip\s*id\s*(\d+)', r'trip\s*id\s*(\d+)') |
|
|
] |
|
|
|
|
|
for pattern1, pattern2 in patterns: |
|
|
if re.search(pattern1, desc1) and re.search(pattern2, desc2): |
|
|
return True |
|
|
return False |
|
|
|
|
|
def calculate_date_difference(self, date1: str, date2: str) -> int: |
|
|
"""Calculate difference in days between two dates""" |
|
|
try: |
|
|
d1 = datetime.strptime(date1, "%Y-%m-%d").date() |
|
|
d2 = datetime.strptime(date2, "%Y-%m-%d").date() |
|
|
return abs((d1 - d2).days) |
|
|
except ValueError: |
|
|
return float('inf') |
|
|
|
|
|
def amounts_match(self, amount1: float, amount2: float) -> bool: |
|
|
"""Check if amounts are close enough to match""" |
|
|
return abs(abs(amount1) - abs(amount2)) <= self.amount_tolerance |
|
|
|
|
|
def types_match(self, bank_type: str, cc_type: str) -> bool: |
|
|
"""Check if transaction types match according to business logic""" |
|
|
type_mappings = { |
|
|
('debit', 'payment'), |
|
|
('credit', 'receipt'), |
|
|
('withdrawal', 'payment'), |
|
|
('deposit', 'receipt') |
|
|
} |
|
|
|
|
|
return (bank_type.lower(), cc_type.lower()) in type_mappings or bank_type.lower() == cc_type.lower() |
|
|
|
|
|
def calculate_match_score(self, bank_txn: Transaction, cc_txn: Transaction) -> tuple[float, str]: |
|
|
"""Calculate overall match score and reason""" |
|
|
scores = [] |
|
|
reasons = [] |
|
|
|
|
|
|
|
|
if self.amounts_match(bank_txn.amount, cc_txn.amount): |
|
|
scores.append(0.4) |
|
|
reasons.append("amounts match") |
|
|
else: |
|
|
amount_diff = abs(abs(bank_txn.amount) - abs(cc_txn.amount)) |
|
|
amount_score = max(0, 0.4 * (1 - amount_diff / max(abs(bank_txn.amount), abs(cc_txn.amount)))) |
|
|
scores.append(amount_score) |
|
|
if amount_score > 0.2: |
|
|
reasons.append("amounts close") |
|
|
|
|
|
|
|
|
if (bank_txn.reference_number and cc_txn.reference_number and |
|
|
bank_txn.reference_number == cc_txn.reference_number): |
|
|
scores.append(0.3) |
|
|
reasons.append("reference numbers match") |
|
|
else: |
|
|
scores.append(0) |
|
|
|
|
|
|
|
|
desc_score = self.fuzzy_match_description(bank_txn.description, cc_txn.description) |
|
|
scores.append(0.2 * desc_score) |
|
|
if desc_score >= self.description_threshold: |
|
|
reasons.append("descriptions match") |
|
|
|
|
|
|
|
|
date_diff = self.calculate_date_difference(bank_txn.date, cc_txn.date) |
|
|
if date_diff <= self.max_date_diff_days: |
|
|
date_score = 0.1 * (1 - date_diff / self.max_date_diff_days) |
|
|
scores.append(date_score) |
|
|
if date_diff <= 1: |
|
|
reasons.append("dates match") |
|
|
else: |
|
|
reasons.append("dates close") |
|
|
else: |
|
|
scores.append(0) |
|
|
|
|
|
total_score = sum(scores) |
|
|
reason = ", ".join(reasons) if reasons else "partial match" |
|
|
|
|
|
return total_score, reason |
|
|
|
|
|
def reconcile(self, input_data: ReconciliationInput) -> ReconciliationOutput: |
|
|
"""Main reconciliation logic""" |
|
|
matched_transactions = [] |
|
|
unmatched_bank = list(input_data.bank_transactions) |
|
|
unmatched_cc = list(input_data.credit_card_transactions) |
|
|
|
|
|
|
|
|
for bank_txn in input_data.bank_transactions: |
|
|
best_match = None |
|
|
best_score = 0 |
|
|
best_reason = "" |
|
|
|
|
|
for cc_txn in input_data.credit_card_transactions: |
|
|
|
|
|
if not self.types_match(bank_txn.type, cc_txn.type): |
|
|
continue |
|
|
|
|
|
score, reason = self.calculate_match_score(bank_txn, cc_txn) |
|
|
|
|
|
|
|
|
if score >= 0.6 and score > best_score: |
|
|
best_match = cc_txn |
|
|
best_score = score |
|
|
best_reason = reason |
|
|
|
|
|
if best_match: |
|
|
matched_transactions.append(MatchedTransaction( |
|
|
bank_id=bank_txn.id, |
|
|
credit_card_id=best_match.id, |
|
|
match_score=round(best_score, 2), |
|
|
match_reason=best_reason, |
|
|
description=bank_txn.description, |
|
|
amount=bank_txn.amount |
|
|
)) |
|
|
|
|
|
|
|
|
if bank_txn in unmatched_bank: |
|
|
unmatched_bank.remove(bank_txn) |
|
|
if best_match in unmatched_cc: |
|
|
unmatched_cc.remove(best_match) |
|
|
|
|
|
|
|
|
unmatched_bank_list = [ |
|
|
UnmatchedTransaction( |
|
|
id=txn.id, |
|
|
date=txn.date, |
|
|
amount=txn.amount, |
|
|
description=txn.description, |
|
|
type=txn.type, |
|
|
reference_number=txn.reference_number |
|
|
) for txn in unmatched_bank |
|
|
] |
|
|
|
|
|
unmatched_cc_list = [ |
|
|
UnmatchedTransaction( |
|
|
id=txn.id, |
|
|
date=txn.date, |
|
|
amount=txn.amount, |
|
|
description=txn.description, |
|
|
type=txn.type, |
|
|
reference_number=txn.reference_number |
|
|
) for txn in unmatched_cc |
|
|
] |
|
|
|
|
|
return ReconciliationOutput( |
|
|
matched_transactions=matched_transactions, |
|
|
unmatched_bank_transactions=unmatched_bank_list, |
|
|
unmatched_credit_card_transactions=unmatched_cc_list |
|
|
) |
|
|
|
|
|
|
|
|
reconciliation_service = ReconciliationService() |
|
|
|
|
|
@app.get("/") |
|
|
async def root(): |
|
|
"""Health check endpoint""" |
|
|
return { |
|
|
"message": "Transaction Reconciliation API is running", |
|
|
"status": "healthy", |
|
|
"version": "1.0.0" |
|
|
} |
|
|
|
|
|
@app.post("/reconcile", response_model=ReconciliationOutput) |
|
|
async def reconcile_transactions(input_data: ReconciliationInput): |
|
|
""" |
|
|
Reconcile bank and credit card transactions |
|
|
|
|
|
This endpoint matches transactions based on: |
|
|
- Amount similarity (within tolerance) |
|
|
- Date proximity (within 7 days) |
|
|
- Description fuzzy matching (70% threshold) |
|
|
- Transaction type compatibility |
|
|
- Reference number exact matching |
|
|
""" |
|
|
try: |
|
|
result = reconciliation_service.reconcile(input_data) |
|
|
return result |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=f"Reconciliation failed: {str(e)}") |
|
|
|
|
|
@app.get("/health") |
|
|
async def health_check(): |
|
|
"""Health check for deployment""" |
|
|
return {"status": "ok", "service": "Transaction Reconciliation API"} |
|
|
|
|
|
@app.post("/reconcile/custom", response_model=ReconciliationOutput) |
|
|
async def reconcile_with_custom_params( |
|
|
input_data: ReconciliationInput, |
|
|
description_threshold: float = Query(0.7, ge=0, le=1, description="Fuzzy match threshold for descriptions"), |
|
|
amount_tolerance: float = Query(0.01, ge=0, description="Maximum allowed difference in amounts"), |
|
|
max_date_diff_days: int = Query(7, ge=0, description="Maximum allowed date difference in days") |
|
|
): |
|
|
""" |
|
|
Reconcile transactions with custom matching parameters |
|
|
""" |
|
|
try: |
|
|
custom_service = ReconciliationService( |
|
|
description_threshold=description_threshold, |
|
|
amount_tolerance=amount_tolerance, |
|
|
max_date_diff_days=max_date_diff_days |
|
|
) |
|
|
result = custom_service.reconcile(input_data) |
|
|
return result |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=f"Reconciliation failed: {str(e)}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
uvicorn.run(app, host="0.0.0.0", port=8000) |