Spaces:

knzychw
/

document-extract-agent

Running

App Files Files Community

document-extract-agent / tests /test_validation.py

kennethzychew

phase 1.2: validation rules (hard/soft + arithmetic checks)

fd5e760 4 days ago

Raw

History Blame Contribute Delete

11.6 kB

	"""Unit tests for the hard/soft validation rules.

	Covers the acceptance criterion for build-plan task 1.2: reconciling totals
	pass H2/H3, mismatches fail, and soft failures are recorded without forcing
	review. Also exercises the monetary epsilon, the skip semantics for absent
	inputs, and the report's serialization.
	"""

	from __future__ import annotations

	from datetime import date

	import pytest

	from doc_agent.schema.models import Document
	from doc_agent.validation.rules import (
	MONETARY_ABS_EPSILON,
	ValidationReport,
	money_close,
	validate,
	)


	def _status(report: ValidationReport, code: str) -> str:
	"""Return the status string for a rule code (fails the test if absent)."""
	result = report.by_code(code)
	assert result is not None, f"missing rule {code}"
	return result.status


	# --- money_close ---------------------------------------------------------------


	def test_money_close_within_absolute_epsilon() -> None:
	"""Differences at or under the absolute floor compare equal."""
	assert money_close(10.00, 10.00 + MONETARY_ABS_EPSILON)
	assert money_close(10.00, 10.00 - MONETARY_ABS_EPSILON)


	def test_money_close_tolerates_cents_rejects_larger_gaps() -> None:
	"""Cent-level gaps reconcile; a half-unit gap on a small amount does not."""
	assert money_close(10.00, 10.02)
	assert not money_close(10.00, 10.50)


	def test_money_close_tiny_amounts_use_absolute_floor() -> None:
	"""Below the floor crossover the absolute epsilon governs the tolerance."""
	# At amount ~1 the relative term (0.005) is under the absolute floor (0.02).
	assert money_close(1.00, 1.01)
	assert not money_close(1.00, 1.05)


	def test_money_close_large_amounts_use_relative_tolerance() -> None:
	"""For large amounts the relative term widens the tolerance."""
	# 0.5% of 10000 == 50, so a 40-unit gap is within tolerance but 100 is not.
	assert money_close(10000.0, 10040.0)
	assert not money_close(10000.0, 10100.0)


	# --- H2: subtotal + tax == total ----------------------------------------------


	def test_h2_reconciling_totals_pass() -> None:
	"""Reconciling subtotal + tax == total passes H2 (acceptance criterion)."""
	document = Document.model_validate({"subtotal": "100.00", "tax": "7.00", "total": "107.00"})
	report = validate(document)
	assert _status(report, "H2") == "pass"
	assert not report.hard_failed


	def test_h2_mismatch_fails_and_forces_review() -> None:
	"""A totals mismatch fails H2 and marks the report hard-failed."""
	document = Document.model_validate({"subtotal": "100.00", "tax": "7.00", "total": "120.00"})
	report = validate(document)
	assert _status(report, "H2") == "fail"
	assert report.hard_failed
	assert "H2" in [r.code for r in report.hard_failures]


	def test_h2_within_epsilon_passes() -> None:
	"""A sub-cent rounding gap still reconciles under the epsilon."""
	document = Document.model_validate({"subtotal": "100.00", "tax": "7.00", "total": "107.01"})
	assert _status(validate(document), "H2") == "pass"


	def test_h2_skipped_when_inputs_absent() -> None:
	"""H2 is skipped (not failed) when an input is missing."""
	document = Document.model_validate({"subtotal": "100.00", "total": "107.00"}) # no tax
	report = validate(document)
	assert _status(report, "H2") == "skip"
	assert not report.hard_failed


	# --- H3: line items reconcile --------------------------------------------------


	def test_h3_line_items_reconcile_to_subtotal() -> None:
	"""Summed line amounts matching the subtotal passes H3 (acceptance)."""
	document = Document.model_validate(
	{
	"line_items": [
	{"description": "A", "amount": "40.00"},
	{"description": "B", "amount": "60.00"},
	],
	"subtotal": "100.00",
	"tax": "7.00",
	"total": "107.00",
	}
	)
	report = validate(document)
	assert _status(report, "H3") == "pass"
	assert _status(report, "H2") == "pass"
	assert not report.hard_failed


	def test_h3_reconciles_to_total_when_no_subtotal() -> None:
	"""With no subtotal, H3 reconciles the line sum against the total."""
	document = Document.model_validate(
	{
	"line_items": [{"amount": "10.00"}, {"amount": "15.00"}],
	"total": "25.00",
	}
	)
	assert _status(validate(document), "H3") == "pass"


	def test_h3_mismatch_fails() -> None:
	"""Line amounts that do not sum to the subtotal fail H3."""
	document = Document.model_validate(
	{
	"line_items": [{"amount": "40.00"}, {"amount": "60.00"}],
	"subtotal": "150.00",
	}
	)
	report = validate(document)
	assert _status(report, "H3") == "fail"
	assert report.hard_failed


	def test_h3_skipped_without_line_items() -> None:
	"""No line items means H3 cannot run and is skipped."""
	document = Document.model_validate({"subtotal": "100.00", "total": "100.00"})
	assert _status(validate(document), "H3") == "skip"


	def test_h3_skipped_when_an_amount_missing() -> None:
	"""A single missing line amount makes the sum incomplete: skip, not fail."""
	document = Document.model_validate(
	{
	"line_items": [{"amount": "40.00"}, {"description": "no amount"}],
	"subtotal": "40.00",
	}
	)
	assert _status(validate(document), "H3") == "skip"


	# --- H1 / H4: critical-field guards -------------------------------------------


	def test_h1_passes_for_well_typed_document() -> None:
	"""A normally-parsed document satisfies the H1 type guard."""
	document = Document.model_validate({"total": "10.00", "tax": "1.00", "invoice_number": "X1"})
	assert _status(validate(document), "H1") == "pass"


	def test_h4_passes_when_total_present_and_nonnegative() -> None:
	"""A present, non-negative total passes H4."""
	assert _status(validate(Document.model_validate({"total": "0.00"})), "H4") == "pass"


	def test_h4_fails_when_total_missing() -> None:
	"""A missing total is a hard failure (never safe to auto-accept)."""
	report = validate(Document.model_validate({"vendor_name": "Acme"}))
	assert _status(report, "H4") == "fail"
	assert report.hard_failed


	def test_h4_fails_when_total_negative() -> None:
	"""A negative total is a hard failure."""
	report = validate(Document.model_validate({"total": "-5.00"}))
	assert _status(report, "H4") == "fail"
	assert report.hard_failed


	# --- Soft rules: recorded without forcing review ------------------------------


	def test_soft_failures_do_not_force_review() -> None:
	"""Soft failures are recorded but never set hard_failed (acceptance).

	This document reconciles arithmetically (hard rules pass) but is missing the
	vendor name, currency, and date and has no checkable line items -- so every
	soft rule fails. The decision path must stay open: hard_failed is False.
	"""
	document = Document.model_validate({"subtotal": "100.00", "tax": "7.00", "total": "107.00"})
	report = validate(document)

	assert not report.hard_failed
	failed_codes = {r.code for r in report.soft_failures}
	assert {"S1", "S2", "S3"} <= failed_codes


	def test_s1_present_date_is_plausible_without_reference() -> None:
	"""With no ``today`` reference, a present date passes S1 (presence only)."""
	document = Document.model_validate({"document_date": "2024-01-15", "total": "1.00"})
	assert _status(validate(document), "S1") == "pass"


	def test_s1_future_date_fails_against_reference() -> None:
	"""A date past today + grace fails S1 when a reference is supplied."""
	document = Document.model_validate({"document_date": "2030-01-01", "total": "1.00"})
	report = validate(document, today=date(2024, 1, 15))
	assert _status(report, "S1") == "fail"
	assert not report.hard_failed # still soft


	def test_s1_missing_date_fails_soft() -> None:
	"""A missing document_date is a soft failure."""
	assert _status(validate(Document.model_validate({"total": "1.00"})), "S1") == "fail"


	def test_s2_known_currency_passes_unknown_fails() -> None:
	"""S2 passes a known ISO code and fails an unknown one."""
	good = Document.model_validate({"currency": "sgd", "total": "1.00"})
	bad = Document.model_validate({"currency": "ZZZ", "total": "1.00"})
	assert _status(validate(good), "S2") == "pass"
	assert _status(validate(bad), "S2") == "fail"


	def test_s3_vendor_present_passes() -> None:
	"""A present vendor name passes S3."""
	document = Document.model_validate({"vendor_name": "Acme Corp", "total": "1.00"})
	assert _status(validate(document), "S3") == "pass"


	def test_s4_per_line_arithmetic() -> None:
	"""S4 passes consistent lines and fails when a line does not reconcile."""
	good = Document.model_validate(
	{"line_items": [{"quantity": "2", "unit_price": "5.00", "amount": "10.00"}], "total": "10.00"}
	)
	bad = Document.model_validate(
	{"line_items": [{"quantity": "2", "unit_price": "5.00", "amount": "11.00"}], "total": "11.00"}
	)
	assert _status(validate(good), "S4") == "pass"
	report = validate(bad)
	assert _status(report, "S4") == "fail"
	assert not report.hard_failed # S4 is soft


	def test_s4_skipped_without_full_line_fields() -> None:
	"""S4 is skipped when no line item carries quantity, unit_price, and amount."""
	document = Document.model_validate(
	{"line_items": [{"description": "X", "amount": "10.00"}], "total": "10.00"}
	)
	assert _status(validate(document), "S4") == "skip"


	# --- Report shape --------------------------------------------------------------


	def test_report_has_one_result_per_rule() -> None:
	"""Every rule reports exactly once, in a stable set of codes."""
	report = validate(Document.model_validate({"total": "1.00"}))
	codes = [r.code for r in report.results]
	assert codes == ["H1", "H2", "H3", "H4", "S1", "S2", "S3", "S4"]


	def test_report_to_dict_is_serializable() -> None:
	"""The report serializes to a plain dict suitable for Document.validation."""
	document = Document.model_validate({"subtotal": "100.00", "tax": "7.00", "total": "120.00"})
	payload = validate(document).to_dict()

	assert payload["hard_failed"] is True
	assert "H2" in payload["hard_failures"]
	assert isinstance(payload["results"], list)
	assert len(payload["results"]) == 8
	first = payload["results"][0]
	assert set(first) == {"code", "severity", "status", "message"}

	# Round-trips through JSON cleanly (no non-serializable objects).
	import json

	assert json.loads(json.dumps(payload)) == payload


	def test_validate_does_not_mutate_document() -> None:
	"""Validation is pure: it leaves the input document untouched."""
	document = Document.model_validate({"subtotal": "100.00", "tax": "7.00", "total": "120.00"})
	validate(document)
	assert document.validation == {}
	assert document.decision is None


	@pytest.mark.parametrize(
	("subtotal", "tax", "total", "expect_pass"),
	[
	("19.99", "1.60", "21.59", True),
	("19.99", "1.60", "21.60", True), # 0.01 rounding, within epsilon
	("19.99", "1.60", "25.00", False),
	],
	)
	def test_h2_epsilon_boundary(subtotal: str, tax: str, total: str, expect_pass: bool) -> None:
	"""H2 tolerates cent-level rounding but rejects real mismatches."""
	document = Document.model_validate({"subtotal": subtotal, "tax": tax, "total": total})
	status = _status(validate(document), "H2")
	assert (status == "pass") is expect_pass