from app.services.field_extractor import FieldExtractor def test_certificate_like_text_extracts_certificate_number_date_and_institution() -> None: text = """ Lagos Technical Institute Certificate of Completion This is to certify that Ada Lovelace completed the Data Verification Program. Certificate No: CERT-2026-001 Date: 15 May 2026 Signed by Registrar with official seal and stamp. """ result = FieldExtractor().extract(text, "general") assert result.checked is True assert result.document_type == "certificate" assert result.extracted_fields["certificate_number"] == "CERT-2026-001" assert result.extracted_fields["date"] == "15 May 2026" assert result.extracted_fields["institution"] == "Lagos Technical Institute" assert result.extracted_fields["signature_present"] is True assert result.extracted_fields["stamp_present"] is True def test_invoice_like_text_extracts_amount_and_invoice_number() -> None: text = """ Invoice Vendor: BitCheck Labs Bill To: Example Customer Ltd Invoice Number: INV-2026-044 Invoice Date: 2026-05-15 Due Date: 2026-05-30 VAT: NGN 7,500.00 Total Amount: NGN 107,500.00 Account Number: 0123456789 Bank Name: Example Bank """ result = FieldExtractor().extract(text, "general") assert result.document_type == "invoice" assert result.extracted_fields["invoice_number"] == "INV-2026-044" assert result.extracted_fields["total_amount"] == "NGN 107,500.00" assert result.extracted_fields["currency"] == "NGN" def test_missing_expected_fields_increases_risk() -> None: sparse = FieldExtractor().extract("Certificate of Completion Certificate No: ABC-123", "certificate") complete = FieldExtractor().extract( """ BitCheck Academy Certificate of Completion This is to certify that Grace Hopper completed the Security Program. Certificate Number: CERT-12345 Grade: Distinction Date: 01/05/2026 Issued by BitCheck Academy Registrar Signature and stamp present. """, "certificate", ) assert "many_expected_fields_missing" in sparse.field_flags assert sparse.missing_expected_fields assert sparse.field_risk_score > complete.field_risk_score assert sparse.field_confidence < complete.field_confidence def test_academic_publication_type_extracts_publication_fields() -> None: text = """ Citation: Raza, A.; Munir, K. A Novel Deep Learning Approach for Deepfake Image Detection. Article A Novel Deep Learning Approach for Deepfake Image Detection Ali Raza, Kashif Munir and Mubarak Almutairi Abstract: Deepfake is utilized in synthetic media. Published: 29 September 2022 https://doi.org/10.3390/app12199820 Publisher's Note: MDPI stays neutral with regard to jurisdictional claims. """ result = FieldExtractor().extract(text, "academic_publication") assert result.document_type == "academic_publication" assert result.field_risk_score < 0.6 assert "many_expected_fields_missing" not in result.field_flags assert result.extracted_fields["doi"] == "10.3390/app12199820"