blood-test-explainer / tests /test_document_processing.py
r0mant1c's picture
Add agent trace panel, vision extraction, and local Transformers backend.
354b37e
Raw
History Blame Contribute Delete
3.13 kB
import sys
import tempfile
from pathlib import Path
import fitz
from PIL import Image
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from src.document_processing import document_intake_metadata, document_to_payload_parts, validate_upload
def test_png_upload_returns_image_url_part():
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
path = tmp.name
Image.new("RGB", (32, 32), color="white").save(path)
parts = document_to_payload_parts(path)
assert len(parts) == 1
assert parts[0]["type"] == "image_url"
assert parts[0]["image_url"]["url"].startswith("data:image/jpeg;base64,")
def test_jpeg_upload_returns_image_url_part():
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
path = tmp.name
Image.new("RGB", (24, 24), color="red").save(path, format="JPEG")
parts = document_to_payload_parts(path)
assert len(parts) == 1
assert parts[0]["type"] == "image_url"
def test_pdf_upload_renders_pages_to_images():
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
path = tmp.name
document = fitz.open()
page = document.new_page()
page.insert_text((72, 72), "Hemoglobin 12.5 g/dL")
document.save(path)
document.close()
parts = document_to_payload_parts(path, max_pages=1)
assert len(parts) == 1
assert parts[0]["type"] == "image_url"
assert parts[0]["image_url"]["url"].startswith("data:image/png;base64,")
def test_text_upload_still_returns_text_part():
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as tmp:
tmp.write("Hemoglobin 13.1 g/dL")
path = tmp.name
parts = document_to_payload_parts(path)
assert len(parts) == 1
assert parts[0]["type"] == "text"
assert "Hemoglobin" in parts[0]["text"]
def test_validate_upload_rejects_unknown_extension():
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp:
path = tmp.name
try:
validate_upload(path)
raise AssertionError("expected ValueError")
except ValueError as error:
assert "Unsupported file type" in str(error)
def test_document_intake_metadata_for_pdf():
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
path = tmp.name
document = fitz.open()
page = document.new_page()
page.insert_text((72, 72), "Sample")
document.save(path)
document.close()
parts = document_to_payload_parts(path, max_pages=1)
metadata = document_intake_metadata(path, parts)
assert metadata["input_modality"] == "vision"
assert metadata["pages_rendered"] == 1
assert metadata["image_count"] == 1
if __name__ == "__main__":
test_png_upload_returns_image_url_part()
test_jpeg_upload_returns_image_url_part()
test_pdf_upload_renders_pages_to_images()
test_text_upload_still_returns_text_part()
test_validate_upload_rejects_unknown_extension()
test_document_intake_metadata_for_pdf()
print("test_document_processing: ok")