| from __future__ import annotations |
|
|
| import os |
|
|
| os.environ.setdefault("PYTHONUTF8", "1") |
|
|
| import numpy as np |
|
|
| from tools.redaction_review import export_review_page_ocr_visualisation_for_gradio |
|
|
|
|
| def test_export_review_page_ocr_visualisation_writes_file(tmp_path): |
| page = { |
| "image": np.full((120, 160, 3), 255, dtype=np.uint8), |
| "boxes": [], |
| } |
| ocr_with_words = [ |
| { |
| "page": 1, |
| "results": { |
| "line_1": { |
| "line": 1, |
| "text": "Hello world", |
| "words": [ |
| { |
| "text": "Hello", |
| "bounding_box": (10, 10, 60, 30), |
| "conf": 95, |
| "model": "Textract", |
| }, |
| { |
| "text": "world", |
| "bounding_box": (70, 10, 120, 30), |
| "conf": 85, |
| "model": "Textract", |
| }, |
| ], |
| } |
| }, |
| } |
| ] |
|
|
| out = export_review_page_ocr_visualisation_for_gradio( |
| page, |
| 1, |
| ocr_with_words, |
| None, |
| "doc.pdf", |
| str(tmp_path), |
| ) |
| assert out is not None |
| resolved_out = os.path.realpath(out) |
| safe_root = os.path.realpath(str(tmp_path)) |
| assert os.path.commonpath([safe_root, resolved_out]) == safe_root |
| assert os.path.isfile(resolved_out) |
| assert "review_ocr_visualisations" in out.replace("\\", "/") |
|
|
|
|
| def test_export_review_page_ocr_visualisation_draws_text_for_normalized_boxes(tmp_path): |
| |
| |
| page = { |
| "image": np.full((120, 160, 3), 255, dtype=np.uint8), |
| "boxes": [], |
| } |
| ocr_with_words = [ |
| { |
| "page": 1, |
| "results": { |
| "line_1": { |
| "line": 1, |
| "text": "Hello world", |
| "words": [ |
| { |
| "text": "Hello", |
| "bounding_box": (0.10, 0.10, 0.40, 0.25), |
| "conf": 95, |
| "model": "Textract", |
| }, |
| { |
| "text": "world", |
| "bounding_box": (0.45, 0.10, 0.80, 0.25), |
| "conf": 85, |
| "model": "Textract", |
| }, |
| ], |
| } |
| }, |
| } |
| ] |
|
|
| out = export_review_page_ocr_visualisation_for_gradio( |
| page, |
| 1, |
| ocr_with_words, |
| None, |
| "doc.pdf", |
| str(tmp_path), |
| ) |
| assert out is not None |
| resolved_out = os.path.realpath(out) |
| safe_root = os.path.realpath(str(tmp_path)) |
| assert os.path.commonpath([safe_root, resolved_out]) == safe_root |
| assert os.path.isfile(resolved_out) |
|
|
| |
| from PIL import Image |
|
|
| img = Image.open(resolved_out).convert("RGB") |
| w, h = img.size |
| |
| crop = img.crop((w // 2 + 5, 5, w - 5, h - 5)) |
| arr = np.asarray(crop) |
| assert (arr < 250).any() |
|
|