Spaces:
Running on Zero
Running on Zero
| import sys | |
| import tempfile | |
| from pathlib import Path | |
| import fitz | |
| from PIL import Image | |
| sys.path.insert(0, str(Path(__file__).resolve().parents[1])) | |
| from src.document_processing import document_intake_metadata, document_to_payload_parts, validate_upload | |
| def test_png_upload_returns_image_url_part(): | |
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: | |
| path = tmp.name | |
| Image.new("RGB", (32, 32), color="white").save(path) | |
| parts = document_to_payload_parts(path) | |
| assert len(parts) == 1 | |
| assert parts[0]["type"] == "image_url" | |
| assert parts[0]["image_url"]["url"].startswith("data:image/jpeg;base64,") | |
| def test_jpeg_upload_returns_image_url_part(): | |
| with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp: | |
| path = tmp.name | |
| Image.new("RGB", (24, 24), color="red").save(path, format="JPEG") | |
| parts = document_to_payload_parts(path) | |
| assert len(parts) == 1 | |
| assert parts[0]["type"] == "image_url" | |
| def test_pdf_upload_renders_pages_to_images(): | |
| with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: | |
| path = tmp.name | |
| document = fitz.open() | |
| page = document.new_page() | |
| page.insert_text((72, 72), "Hemoglobin 12.5 g/dL") | |
| document.save(path) | |
| document.close() | |
| parts = document_to_payload_parts(path, max_pages=1) | |
| assert len(parts) == 1 | |
| assert parts[0]["type"] == "image_url" | |
| assert parts[0]["image_url"]["url"].startswith("data:image/png;base64,") | |
| def test_text_upload_still_returns_text_part(): | |
| with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as tmp: | |
| tmp.write("Hemoglobin 13.1 g/dL") | |
| path = tmp.name | |
| parts = document_to_payload_parts(path) | |
| assert len(parts) == 1 | |
| assert parts[0]["type"] == "text" | |
| assert "Hemoglobin" in parts[0]["text"] | |
| def test_validate_upload_rejects_unknown_extension(): | |
| with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp: | |
| path = tmp.name | |
| try: | |
| validate_upload(path) | |
| raise AssertionError("expected ValueError") | |
| except ValueError as error: | |
| assert "Unsupported file type" in str(error) | |
| def test_document_intake_metadata_for_pdf(): | |
| with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: | |
| path = tmp.name | |
| document = fitz.open() | |
| page = document.new_page() | |
| page.insert_text((72, 72), "Sample") | |
| document.save(path) | |
| document.close() | |
| parts = document_to_payload_parts(path, max_pages=1) | |
| metadata = document_intake_metadata(path, parts) | |
| assert metadata["input_modality"] == "vision" | |
| assert metadata["pages_rendered"] == 1 | |
| assert metadata["image_count"] == 1 | |
| if __name__ == "__main__": | |
| test_png_upload_returns_image_url_part() | |
| test_jpeg_upload_returns_image_url_part() | |
| test_pdf_upload_renders_pages_to_images() | |
| test_text_upload_still_returns_text_part() | |
| test_validate_upload_rejects_unknown_extension() | |
| test_document_intake_metadata_for_pdf() | |
| print("test_document_processing: ok") | |