Spaces:
Running
Running
| from pathlib import Path | |
| PATCH_TEXT = Path("fix_streamlit_pdf_text_reinsert.py").read_text(encoding="utf-8") | |
| DOCKERFILE_TEXT = Path("Dockerfile").read_text(encoding="utf-8") | |
| def test_pdf_text_reinsert_helper_is_imported_and_used(): | |
| assert "from scrub_key_pdf_text_reinsert import reinsert_pdf_text_bytes" in PATCH_TEXT | |
| assert "reinsert_pdf_text_bytes(" in PATCH_TEXT | |
| assert "pdf_text_reinsert_file.getvalue()" in PATCH_TEXT | |
| def test_pdf_text_reinsert_ui_labels_are_present(): | |
| for marker in [ | |
| "PDF-tekst terugzetten naar TXT", | |
| "PDF-bestand terugzetten naar TXT", | |
| "Upload een PDF-bestand met placeholders", | |
| "Zet PDF-tekst lokaal terug", | |
| "Herstelde TXT-tekst uit PDF", | |
| "Download herstelde TXT uit PDF (.txt)", | |
| "Controleverslag PDF-tekst terugzetten", | |
| ]: | |
| assert marker in PATCH_TEXT | |
| def test_pdf_text_reinsert_required_warnings_are_present(): | |
| for marker in [ | |
| "PDF-tekstextractie is niet altijd volledig", | |
| "Opmaak, tabellen, kolommen, headers, footers en visuele volgorde kunnen verloren gaan", | |
| "Deze functie maakt geen herstelde PDF", | |
| "De uitvoer is alleen herstelde TXT-tekst", | |
| "Scans of afbeelding-PDF’s worden niet ondersteund omdat OCR niet beschikbaar is", | |
| "terugzetten herstelt originele gevoelige waarden", | |
| "geen AI, geen cloudverwerking en geen OCR", | |
| ]: | |
| assert marker in PATCH_TEXT | |
| def test_pdf_text_reinsert_audit_fields_are_present(): | |
| for marker in [ | |
| "document_type", | |
| "extracted_text_length", | |
| "replacement_count", | |
| "item_count", | |
| "active_item_count", | |
| "excluded_item_count", | |
| "placeholders_not_found", | |
| "unknown_placeholders", | |
| "duplicate_placeholders", | |
| "validation_issues", | |
| "unsupported_reason", | |
| "local_only", | |
| "ai_processing", | |
| "cloud_processing", | |
| "ocr_used", | |
| "pdf_output", | |
| "Documenttype", | |
| "Lengte geëxtraheerde tekst", | |
| "Niet-ondersteund reden", | |
| "Lokaal uitgevoerd", | |
| "AI-verwerking", | |
| "Cloudverwerking", | |
| "OCR gebruikt", | |
| "PDF-output", | |
| ]: | |
| assert marker in PATCH_TEXT | |
| def test_pdf_text_reinsert_shows_local_no_ai_no_cloud_no_ocr_no_pdf_output(): | |
| for marker in [ | |
| "Lokaal uitgevoerd: Ja", | |
| "AI-verwerking: Nee", | |
| "Cloudverwerking: Nee", | |
| "OCR gebruikt: Nee", | |
| "PDF-output: Nee", | |
| ]: | |
| assert marker in PATCH_TEXT | |
| def test_pdf_text_reinsert_accepts_pdf_only_and_requires_key(): | |
| assert 'type=["pdf"]' in PATCH_TEXT | |
| assert "Laad eerst een geldige Scrub Key" in PATCH_TEXT | |
| assert "Upload eerst een PDF-bestand met placeholders" in PATCH_TEXT | |
| assert "active_pdf_text_reinsert_scrub_key" in PATCH_TEXT | |
| assert 'st.session_state.get("active_scrub_key", {})' in PATCH_TEXT | |
| def test_pdf_text_reinsert_unsupported_case_does_not_offer_successful_download(): | |
| assert "pdf_text_unsupported_reason" in PATCH_TEXT | |
| assert "pdf_text_can_download = not pdf_text_validation_issues and not pdf_text_unsupported_reason" in PATCH_TEXT | |
| assert "Geen bruikbare tekstlaag gevonden" in PATCH_TEXT | |
| assert "Scans of afbeelding-PDF’s worden niet ondersteund" in PATCH_TEXT | |
| assert "if pdf_text_can_download:" in PATCH_TEXT | |
| def test_pdf_text_reinsert_is_inserted_before_anonymization_else_branch(): | |
| insert_marker = 'pdf_insert_marker = \'\'\'' | |
| assert insert_marker in PATCH_TEXT | |
| assert "+ pdf_text_reinsert_ui_block" in PATCH_TEXT | |
| assert "'''else:\n'''" in PATCH_TEXT | |
| def test_dockerfile_runs_pdf_text_reinsert_patch_after_existing_patch(): | |
| assert "python fix_streamlit_nested_expanders.py && python fix_streamlit_pdf_text_reinsert.py" in DOCKERFILE_TEXT | |
| def test_dockerfile_installs_runtime_pdf_parser_for_approved_ui_path(): | |
| assert "pypdf" in DOCKERFILE_TEXT | |
| assert "poetry install --no-root" in DOCKERFILE_TEXT | |
| def test_no_restored_pdf_ocr_cloud_ai_or_rehydration_behavior_added(): | |
| lower_patch = PATCH_TEXT.lower() | |
| forbidden_markers = [ | |
| "download herstelde pdf", | |
| "download_pdf_reinserted", | |
| "pdf_to_docx", | |
| "pytesseract", | |
| "ocr_used = true", | |
| "requests.post", | |
| "httpx.post", | |
| "cloud processing call", | |
| "restore_original_document", | |
| "automatic pdf rehydration", | |
| "server-side key storage", | |
| "durable key vault", | |
| "openai", | |
| "anthropic", | |
| ] | |
| for marker in forbidden_markers: | |
| assert marker not in lower_patch | |
| assert "st.stop()" not in PATCH_TEXT | |
| assert "blocks_export = True" not in PATCH_TEXT | |
| assert "changes_export_semantics = True" not in PATCH_TEXT | |