scrub / tests /test_pdf_text_reinsert_ui_patch.py
solidprivacy-nl
Fix PDF text UI test anchor expectation
cb2a980
Raw
History Blame Contribute Delete
4.76 kB
from pathlib import Path
PATCH_TEXT = Path("fix_streamlit_pdf_text_reinsert.py").read_text(encoding="utf-8")
DOCKERFILE_TEXT = Path("Dockerfile").read_text(encoding="utf-8")
def test_pdf_text_reinsert_helper_is_imported_and_used():
assert "from scrub_key_pdf_text_reinsert import reinsert_pdf_text_bytes" in PATCH_TEXT
assert "reinsert_pdf_text_bytes(" in PATCH_TEXT
assert "pdf_text_reinsert_file.getvalue()" in PATCH_TEXT
def test_pdf_text_reinsert_ui_labels_are_present():
for marker in [
"PDF-tekst terugzetten naar TXT",
"PDF-bestand terugzetten naar TXT",
"Upload een PDF-bestand met placeholders",
"Zet PDF-tekst lokaal terug",
"Herstelde TXT-tekst uit PDF",
"Download herstelde TXT uit PDF (.txt)",
"Controleverslag PDF-tekst terugzetten",
]:
assert marker in PATCH_TEXT
def test_pdf_text_reinsert_required_warnings_are_present():
for marker in [
"PDF-tekstextractie is niet altijd volledig",
"Opmaak, tabellen, kolommen, headers, footers en visuele volgorde kunnen verloren gaan",
"Deze functie maakt geen herstelde PDF",
"De uitvoer is alleen herstelde TXT-tekst",
"Scans of afbeelding-PDF’s worden niet ondersteund omdat OCR niet beschikbaar is",
"terugzetten herstelt originele gevoelige waarden",
"geen AI, geen cloudverwerking en geen OCR",
]:
assert marker in PATCH_TEXT
def test_pdf_text_reinsert_audit_fields_are_present():
for marker in [
"document_type",
"extracted_text_length",
"replacement_count",
"item_count",
"active_item_count",
"excluded_item_count",
"placeholders_not_found",
"unknown_placeholders",
"duplicate_placeholders",
"validation_issues",
"unsupported_reason",
"local_only",
"ai_processing",
"cloud_processing",
"ocr_used",
"pdf_output",
"Documenttype",
"Lengte geëxtraheerde tekst",
"Niet-ondersteund reden",
"Lokaal uitgevoerd",
"AI-verwerking",
"Cloudverwerking",
"OCR gebruikt",
"PDF-output",
]:
assert marker in PATCH_TEXT
def test_pdf_text_reinsert_shows_local_no_ai_no_cloud_no_ocr_no_pdf_output():
for marker in [
"Lokaal uitgevoerd: Ja",
"AI-verwerking: Nee",
"Cloudverwerking: Nee",
"OCR gebruikt: Nee",
"PDF-output: Nee",
]:
assert marker in PATCH_TEXT
def test_pdf_text_reinsert_accepts_pdf_only_and_requires_key():
assert 'type=["pdf"]' in PATCH_TEXT
assert "Laad eerst een geldige Scrub Key" in PATCH_TEXT
assert "Upload eerst een PDF-bestand met placeholders" in PATCH_TEXT
assert "active_pdf_text_reinsert_scrub_key" in PATCH_TEXT
assert 'st.session_state.get("active_scrub_key", {})' in PATCH_TEXT
def test_pdf_text_reinsert_unsupported_case_does_not_offer_successful_download():
assert "pdf_text_unsupported_reason" in PATCH_TEXT
assert "pdf_text_can_download = not pdf_text_validation_issues and not pdf_text_unsupported_reason" in PATCH_TEXT
assert "Geen bruikbare tekstlaag gevonden" in PATCH_TEXT
assert "Scans of afbeelding-PDF’s worden niet ondersteund" in PATCH_TEXT
assert "if pdf_text_can_download:" in PATCH_TEXT
def test_pdf_text_reinsert_is_inserted_before_anonymization_else_branch():
insert_marker = 'pdf_insert_marker = \'\'\''
assert insert_marker in PATCH_TEXT
assert "+ pdf_text_reinsert_ui_block" in PATCH_TEXT
assert "'''else:\n'''" in PATCH_TEXT
def test_dockerfile_runs_pdf_text_reinsert_patch_after_existing_patch():
assert "python fix_streamlit_nested_expanders.py && python fix_streamlit_pdf_text_reinsert.py" in DOCKERFILE_TEXT
def test_dockerfile_installs_runtime_pdf_parser_for_approved_ui_path():
assert "pypdf" in DOCKERFILE_TEXT
assert "poetry install --no-root" in DOCKERFILE_TEXT
def test_no_restored_pdf_ocr_cloud_ai_or_rehydration_behavior_added():
lower_patch = PATCH_TEXT.lower()
forbidden_markers = [
"download herstelde pdf",
"download_pdf_reinserted",
"pdf_to_docx",
"pytesseract",
"ocr_used = true",
"requests.post",
"httpx.post",
"cloud processing call",
"restore_original_document",
"automatic pdf rehydration",
"server-side key storage",
"durable key vault",
"openai",
"anthropic",
]
for marker in forbidden_markers:
assert marker not in lower_patch
assert "st.stop()" not in PATCH_TEXT
assert "blocks_export = True" not in PATCH_TEXT
assert "changes_export_semantics = True" not in PATCH_TEXT