statementsetu / validate.py
perceptron01's picture
Upload 16 files
10ec275 verified
Raw
History Blame Contribute Delete
4.9 kB
"""Step C: deterministic validation -- the trust-winning feature.
Running-balance reconciliation: for each consecutive pair of rows,
expected_balance[i] = balance[i-1] - debit[i] + credit[i]
If |expected - printed| > tolerance, both rows are flagged for review.
Also validates: dates parse + are monotonically non-decreasing; each row has
exactly one of debit/credit; a printed totals row (if any) matches the sum.
This converts "the AI might hallucinate" into "the math checks itself".
"""
TOLERANCE = 0.01
def detect_direction(transactions):
"""Detect the bank's balance sign convention from the first few rows.
Standard convention: balance[i] = balance[i-1] - debit + credit.
Some statements print the opposite. We test both on the first usable rows
and return +1 (standard) or -1 (inverted).
"""
standard_err = 0.0
inverted_err = 0.0
checked = 0
prev = None
for t in transactions:
bal = t.get("balance")
if bal is None:
prev = bal
continue
if prev is not None:
delta = (t.get("debit") or 0) - (t.get("credit") or 0)
standard_err += abs((prev - delta) - bal)
inverted_err += abs((prev + delta) - bal)
checked += 1
prev = bal
if checked >= 3:
break
return -1 if inverted_err < standard_err else 1
def reconcile(transactions):
"""Reconcile running balances. Mutates each txn with a 'flags' list.
Returns a dict:
{reconciled, total, direction, banner, all_flags}
where `total` counts rows that had a checkable printed balance.
"""
direction = detect_direction(transactions)
reconciled = 0
checkable = 0
prev_balance = None
for t in transactions:
t.setdefault("flags", [])
for i, t in enumerate(transactions):
bal = t.get("balance")
debit = t.get("debit") or 0
credit = t.get("credit") or 0
if prev_balance is not None and bal is not None:
checkable += 1
if direction == 1:
expected = prev_balance - debit + credit
else:
expected = prev_balance + debit - credit
if abs(expected - bal) > TOLERANCE:
msg = f"balance mismatch (expected {expected:.2f}, printed {bal:.2f})"
_flag(transactions, i, "balance", msg)
_flag(transactions, i - 1, "balance", "adjacent to balance mismatch")
else:
reconciled += 1
if bal is not None:
prev_balance = bal
# Structural checks (debit XOR credit; date present).
_structural_checks(transactions)
banner = _banner(reconciled, checkable)
all_flags = sum(1 for t in transactions if t.get("flags"))
return {
"reconciled": reconciled,
"total": checkable,
"direction": direction,
"banner": banner,
"flagged_rows": all_flags,
}
def _flag(transactions, idx, kind, msg):
if 0 <= idx < len(transactions):
flags = transactions[idx].setdefault("flags", [])
if msg not in flags:
flags.append(msg)
def _structural_checks(transactions):
prev_date = None
for i, t in enumerate(transactions):
debit = t.get("debit")
credit = t.get("credit")
has_debit = debit is not None and debit != 0
has_credit = credit is not None and credit != 0
if has_debit and has_credit:
_flag(transactions, i, "amount", "both debit and credit present")
if not has_debit and not has_credit:
_flag(transactions, i, "amount", "no debit or credit amount")
date = t.get("date")
if not date:
_flag(transactions, i, "date", "unparseable date")
elif prev_date and date < prev_date:
_flag(transactions, i, "date", f"date {date} precedes previous {prev_date}")
if date:
prev_date = date
def _banner(reconciled, total):
if total == 0:
return "ℹ️ No printed balances available to reconcile."
if reconciled == total:
return f"✅ {reconciled}/{total} rows reconciled against printed balances"
return (f"⚠️ {reconciled}/{total} rows reconciled — "
f"{total - reconciled} need review (flagged below)")
def flags_text(txn):
"""Compact ⚠ string for the review table."""
flags = txn.get("flags") or []
return ("⚠ " + "; ".join(flags)) if flags else ""
def summary_stats(transactions):
"""Totals for the summary chips."""
total_debit = round(sum(t.get("debit") or 0 for t in transactions), 2)
total_credit = round(sum(t.get("credit") or 0 for t in transactions), 2)
return {
"count": len(transactions),
"total_debit": total_debit,
"total_credit": total_credit,
"net": round(total_credit - total_debit, 2),
}