bitcheck-document / tests /test_field_extractor.py
AI Assistant
Update Bitcheck Document Service and test.html
37a1755
from app.services.field_extractor import FieldExtractor
def test_certificate_like_text_extracts_certificate_number_date_and_institution() -> None:
text = """
Lagos Technical Institute
Certificate of Completion
This is to certify that Ada Lovelace completed the Data Verification Program.
Certificate No: CERT-2026-001
Date: 15 May 2026
Signed by Registrar with official seal and stamp.
"""
result = FieldExtractor().extract(text, "general")
assert result.checked is True
assert result.document_type == "certificate"
assert result.extracted_fields["certificate_number"] == "CERT-2026-001"
assert result.extracted_fields["date"] == "15 May 2026"
assert result.extracted_fields["institution"] == "Lagos Technical Institute"
assert result.extracted_fields["signature_present"] is True
assert result.extracted_fields["stamp_present"] is True
def test_invoice_like_text_extracts_amount_and_invoice_number() -> None:
text = """
Invoice
Vendor: BitCheck Labs
Bill To: Example Customer Ltd
Invoice Number: INV-2026-044
Invoice Date: 2026-05-15
Due Date: 2026-05-30
VAT: NGN 7,500.00
Total Amount: NGN 107,500.00
Account Number: 0123456789
Bank Name: Example Bank
"""
result = FieldExtractor().extract(text, "general")
assert result.document_type == "invoice"
assert result.extracted_fields["invoice_number"] == "INV-2026-044"
assert result.extracted_fields["total_amount"] == "NGN 107,500.00"
assert result.extracted_fields["currency"] == "NGN"
def test_missing_expected_fields_increases_risk() -> None:
sparse = FieldExtractor().extract("Certificate of Completion Certificate No: ABC-123", "certificate")
complete = FieldExtractor().extract(
"""
BitCheck Academy
Certificate of Completion
This is to certify that Grace Hopper completed the Security Program.
Certificate Number: CERT-12345
Grade: Distinction
Date: 01/05/2026
Issued by BitCheck Academy Registrar
Signature and stamp present.
""",
"certificate",
)
assert "many_expected_fields_missing" in sparse.field_flags
assert sparse.missing_expected_fields
assert sparse.field_risk_score > complete.field_risk_score
assert sparse.field_confidence < complete.field_confidence
def test_academic_publication_type_extracts_publication_fields() -> None:
text = """
Citation: Raza, A.; Munir, K. A Novel Deep Learning Approach for Deepfake Image Detection.
Article
A Novel Deep Learning Approach for Deepfake Image Detection
Ali Raza, Kashif Munir and Mubarak Almutairi
Abstract: Deepfake is utilized in synthetic media.
Published: 29 September 2022
https://doi.org/10.3390/app12199820
Publisher's Note: MDPI stays neutral with regard to jurisdictional claims.
"""
result = FieldExtractor().extract(text, "academic_publication")
assert result.document_type == "academic_publication"
assert result.field_risk_score < 0.6
assert "many_expected_fields_missing" not in result.field_flags
assert result.extracted_fields["doi"] == "10.3390/app12199820"