Spaces:
Running
Running
File size: 7,100 Bytes
5539271 cc59214 5539271 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 | """Tests for document_service — upload, preview, page counting, and deletion."""
from __future__ import annotations
import os
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from domain.models import Document
from services import document_service
class TestUploadValidation:
@pytest.mark.asyncio
async def test_rejects_oversized_file(self):
content = b"x" * (document_service.MAX_FILE_SIZE + 1)
with pytest.raises(ValueError, match="File too large"):
await document_service.upload("big.pdf", "application/pdf", content)
@pytest.mark.asyncio
async def test_rejects_non_pdf(self):
content = b"NOT-A-PDF-FILE"
with pytest.raises(ValueError, match="not a PDF"):
await document_service.upload("fake.pdf", "application/pdf", content)
@pytest.mark.asyncio
async def test_rejects_too_many_pages(self, tmp_path, monkeypatch):
monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))
monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 20)
with patch.object(document_service, "_count_pages", return_value=40):
content = b"%PDF-1.4 fake pdf content"
with pytest.raises(ValueError, match="Too many pages"):
await document_service.upload("big.pdf", "application/pdf", content)
# Verify temp file was cleaned up
assert len(os.listdir(tmp_path)) == 0
@pytest.mark.asyncio
async def test_allows_pdf_under_page_limit(self, tmp_path, monkeypatch):
monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))
monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 20)
mock_insert = AsyncMock()
with (
patch("persistence.document_repo.insert", mock_insert),
patch.object(document_service, "_count_pages", return_value=15),
):
content = b"%PDF-1.4 fake pdf content"
doc = await document_service.upload("ok.pdf", "application/pdf", content)
assert doc.page_count == 15
mock_insert.assert_called_once()
@pytest.mark.asyncio
async def test_unlimited_pages_when_zero(self, tmp_path, monkeypatch):
monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))
monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 0)
mock_insert = AsyncMock()
with (
patch("persistence.document_repo.insert", mock_insert),
patch.object(document_service, "_count_pages", return_value=100),
):
content = b"%PDF-1.4 fake pdf content"
doc = await document_service.upload("big.pdf", "application/pdf", content)
assert doc.page_count == 100
@pytest.mark.asyncio
async def test_accepts_valid_pdf(self, tmp_path, monkeypatch):
monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))
mock_insert = AsyncMock()
with (
patch("persistence.document_repo.insert", mock_insert),
patch.object(document_service, "_count_pages", return_value=5),
):
content = b"%PDF-1.4 fake pdf content"
doc = await document_service.upload("test.pdf", "application/pdf", content)
assert doc.filename == "test.pdf"
assert doc.file_size == len(content)
assert doc.page_count == 5
mock_insert.assert_called_once()
# Verify file was actually written to disk
assert os.path.exists(doc.storage_path)
with open(doc.storage_path, "rb") as f:
assert f.read() == content
class TestGeneratePreview:
def test_raises_on_invalid_page(self):
"""generate_preview should raise ValueError when page is out of range."""
with (
patch("services.document_service.convert_from_bytes", return_value=[]),
pytest.raises(ValueError, match="Page 1 not found"),
):
document_service.generate_preview(b"%PDF-fake", page=1)
def test_returns_png_bytes(self):
"""generate_preview should return PNG bytes from pdf2image."""
mock_image = MagicMock()
mock_image.save = MagicMock(side_effect=lambda buf, format: buf.write(b"PNG-DATA"))
with patch("services.document_service.convert_from_bytes", return_value=[mock_image]):
result = document_service.generate_preview(b"%PDF-fake", page=1, dpi=72)
assert result == b"PNG-DATA"
class TestCountPages:
def test_returns_page_count(self):
with patch(
"services.document_service.pdfinfo_from_bytes",
return_value={"Pages": 42},
):
assert document_service._count_pages(b"pdf") == 42
def test_returns_none_on_error(self):
with patch(
"services.document_service.pdfinfo_from_bytes",
side_effect=FileNotFoundError("poppler not found"),
):
assert document_service._count_pages(b"pdf") is None
class TestDelete:
@pytest.mark.asyncio
async def test_delete_removes_file_and_records(self, tmp_path, monkeypatch):
monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))
# Create a fake file
fake_file = tmp_path / "test.pdf"
fake_file.write_bytes(b"content")
doc = Document(
id="doc-1",
filename="test.pdf",
storage_path=str(fake_file),
)
with (
patch("persistence.document_repo.find_by_id", AsyncMock(return_value=doc)),
patch("persistence.analysis_repo.delete_by_document", AsyncMock(return_value=2)),
patch("persistence.document_repo.delete", AsyncMock(return_value=True)),
):
result = await document_service.delete("doc-1")
assert result is True
assert not fake_file.exists()
@pytest.mark.asyncio
async def test_delete_refuses_file_outside_upload_dir(self, tmp_path, monkeypatch):
"""Files outside UPLOAD_DIR should not be deleted (path traversal protection)."""
monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path / "uploads"))
os.makedirs(tmp_path / "uploads", exist_ok=True)
# File is outside the upload dir
outside_file = tmp_path / "secret.txt"
outside_file.write_bytes(b"secret")
doc = Document(id="doc-1", filename="x.pdf", storage_path=str(outside_file))
with (
patch("persistence.document_repo.find_by_id", AsyncMock(return_value=doc)),
patch("persistence.analysis_repo.delete_by_document", AsyncMock(return_value=0)),
patch("persistence.document_repo.delete", AsyncMock(return_value=True)),
):
await document_service.delete("doc-1")
# File should NOT have been deleted
assert outside_file.exists()
@pytest.mark.asyncio
async def test_delete_not_found_returns_false(self):
with patch("persistence.document_repo.find_by_id", AsyncMock(return_value=None)):
result = await document_service.delete("missing")
assert result is False
|