"""Tests for document_service — upload, preview, page counting, and deletion.""" from __future__ import annotations import os from unittest.mock import AsyncMock, MagicMock, patch import pytest from domain.models import Document from services import document_service class TestUploadValidation: @pytest.mark.asyncio async def test_rejects_oversized_file(self): content = b"x" * (document_service.MAX_FILE_SIZE + 1) with pytest.raises(ValueError, match="File too large"): await document_service.upload("big.pdf", "application/pdf", content) @pytest.mark.asyncio async def test_rejects_non_pdf(self): content = b"NOT-A-PDF-FILE" with pytest.raises(ValueError, match="not a PDF"): await document_service.upload("fake.pdf", "application/pdf", content) @pytest.mark.asyncio async def test_rejects_too_many_pages(self, tmp_path, monkeypatch): monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path)) monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 20) with patch.object(document_service, "_count_pages", return_value=40): content = b"%PDF-1.4 fake pdf content" with pytest.raises(ValueError, match="Too many pages"): await document_service.upload("big.pdf", "application/pdf", content) # Verify temp file was cleaned up assert len(os.listdir(tmp_path)) == 0 @pytest.mark.asyncio async def test_allows_pdf_under_page_limit(self, tmp_path, monkeypatch): monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path)) monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 20) mock_insert = AsyncMock() with ( patch("persistence.document_repo.insert", mock_insert), patch.object(document_service, "_count_pages", return_value=15), ): content = b"%PDF-1.4 fake pdf content" doc = await document_service.upload("ok.pdf", "application/pdf", content) assert doc.page_count == 15 mock_insert.assert_called_once() @pytest.mark.asyncio async def test_unlimited_pages_when_zero(self, tmp_path, monkeypatch): monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path)) monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 0) mock_insert = AsyncMock() with ( patch("persistence.document_repo.insert", mock_insert), patch.object(document_service, "_count_pages", return_value=100), ): content = b"%PDF-1.4 fake pdf content" doc = await document_service.upload("big.pdf", "application/pdf", content) assert doc.page_count == 100 @pytest.mark.asyncio async def test_accepts_valid_pdf(self, tmp_path, monkeypatch): monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path)) mock_insert = AsyncMock() with ( patch("persistence.document_repo.insert", mock_insert), patch.object(document_service, "_count_pages", return_value=5), ): content = b"%PDF-1.4 fake pdf content" doc = await document_service.upload("test.pdf", "application/pdf", content) assert doc.filename == "test.pdf" assert doc.file_size == len(content) assert doc.page_count == 5 mock_insert.assert_called_once() # Verify file was actually written to disk assert os.path.exists(doc.storage_path) with open(doc.storage_path, "rb") as f: assert f.read() == content class TestGeneratePreview: def test_raises_on_invalid_page(self): """generate_preview should raise ValueError when page is out of range.""" with ( patch("services.document_service.convert_from_bytes", return_value=[]), pytest.raises(ValueError, match="Page 1 not found"), ): document_service.generate_preview(b"%PDF-fake", page=1) def test_returns_png_bytes(self): """generate_preview should return PNG bytes from pdf2image.""" mock_image = MagicMock() mock_image.save = MagicMock(side_effect=lambda buf, format: buf.write(b"PNG-DATA")) with patch("services.document_service.convert_from_bytes", return_value=[mock_image]): result = document_service.generate_preview(b"%PDF-fake", page=1, dpi=72) assert result == b"PNG-DATA" class TestCountPages: def test_returns_page_count(self): with patch( "services.document_service.pdfinfo_from_bytes", return_value={"Pages": 42}, ): assert document_service._count_pages(b"pdf") == 42 def test_returns_none_on_error(self): with patch( "services.document_service.pdfinfo_from_bytes", side_effect=FileNotFoundError("poppler not found"), ): assert document_service._count_pages(b"pdf") is None class TestDelete: @pytest.mark.asyncio async def test_delete_removes_file_and_records(self, tmp_path, monkeypatch): monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path)) # Create a fake file fake_file = tmp_path / "test.pdf" fake_file.write_bytes(b"content") doc = Document( id="doc-1", filename="test.pdf", storage_path=str(fake_file), ) with ( patch("persistence.document_repo.find_by_id", AsyncMock(return_value=doc)), patch("persistence.analysis_repo.delete_by_document", AsyncMock(return_value=2)), patch("persistence.document_repo.delete", AsyncMock(return_value=True)), ): result = await document_service.delete("doc-1") assert result is True assert not fake_file.exists() @pytest.mark.asyncio async def test_delete_refuses_file_outside_upload_dir(self, tmp_path, monkeypatch): """Files outside UPLOAD_DIR should not be deleted (path traversal protection).""" monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path / "uploads")) os.makedirs(tmp_path / "uploads", exist_ok=True) # File is outside the upload dir outside_file = tmp_path / "secret.txt" outside_file.write_bytes(b"secret") doc = Document(id="doc-1", filename="x.pdf", storage_path=str(outside_file)) with ( patch("persistence.document_repo.find_by_id", AsyncMock(return_value=doc)), patch("persistence.analysis_repo.delete_by_document", AsyncMock(return_value=0)), patch("persistence.document_repo.delete", AsyncMock(return_value=True)), ): await document_service.delete("doc-1") # File should NOT have been deleted assert outside_file.exists() @pytest.mark.asyncio async def test_delete_not_found_returns_false(self): with patch("persistence.document_repo.find_by_id", AsyncMock(return_value=None)): result = await document_service.delete("missing") assert result is False