File size: 7,100 Bytes
5539271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc59214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5539271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
"""Tests for document_service — upload, preview, page counting, and deletion."""

from __future__ import annotations

import os
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from domain.models import Document
from services import document_service


class TestUploadValidation:
    @pytest.mark.asyncio
    async def test_rejects_oversized_file(self):
        content = b"x" * (document_service.MAX_FILE_SIZE + 1)
        with pytest.raises(ValueError, match="File too large"):
            await document_service.upload("big.pdf", "application/pdf", content)

    @pytest.mark.asyncio
    async def test_rejects_non_pdf(self):
        content = b"NOT-A-PDF-FILE"
        with pytest.raises(ValueError, match="not a PDF"):
            await document_service.upload("fake.pdf", "application/pdf", content)

    @pytest.mark.asyncio
    async def test_rejects_too_many_pages(self, tmp_path, monkeypatch):
        monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))
        monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 20)

        with patch.object(document_service, "_count_pages", return_value=40):
            content = b"%PDF-1.4 fake pdf content"
            with pytest.raises(ValueError, match="Too many pages"):
                await document_service.upload("big.pdf", "application/pdf", content)

        # Verify temp file was cleaned up
        assert len(os.listdir(tmp_path)) == 0

    @pytest.mark.asyncio
    async def test_allows_pdf_under_page_limit(self, tmp_path, monkeypatch):
        monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))
        monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 20)

        mock_insert = AsyncMock()
        with (
            patch("persistence.document_repo.insert", mock_insert),
            patch.object(document_service, "_count_pages", return_value=15),
        ):
            content = b"%PDF-1.4 fake pdf content"
            doc = await document_service.upload("ok.pdf", "application/pdf", content)

        assert doc.page_count == 15
        mock_insert.assert_called_once()

    @pytest.mark.asyncio
    async def test_unlimited_pages_when_zero(self, tmp_path, monkeypatch):
        monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))
        monkeypatch.setattr(document_service, "MAX_PAGE_COUNT", 0)

        mock_insert = AsyncMock()
        with (
            patch("persistence.document_repo.insert", mock_insert),
            patch.object(document_service, "_count_pages", return_value=100),
        ):
            content = b"%PDF-1.4 fake pdf content"
            doc = await document_service.upload("big.pdf", "application/pdf", content)

        assert doc.page_count == 100

    @pytest.mark.asyncio
    async def test_accepts_valid_pdf(self, tmp_path, monkeypatch):
        monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))

        mock_insert = AsyncMock()
        with (
            patch("persistence.document_repo.insert", mock_insert),
            patch.object(document_service, "_count_pages", return_value=5),
        ):
            content = b"%PDF-1.4 fake pdf content"
            doc = await document_service.upload("test.pdf", "application/pdf", content)

        assert doc.filename == "test.pdf"
        assert doc.file_size == len(content)
        assert doc.page_count == 5
        mock_insert.assert_called_once()

        # Verify file was actually written to disk
        assert os.path.exists(doc.storage_path)
        with open(doc.storage_path, "rb") as f:
            assert f.read() == content


class TestGeneratePreview:
    def test_raises_on_invalid_page(self):
        """generate_preview should raise ValueError when page is out of range."""
        with (
            patch("services.document_service.convert_from_bytes", return_value=[]),
            pytest.raises(ValueError, match="Page 1 not found"),
        ):
            document_service.generate_preview(b"%PDF-fake", page=1)

    def test_returns_png_bytes(self):
        """generate_preview should return PNG bytes from pdf2image."""
        mock_image = MagicMock()
        mock_image.save = MagicMock(side_effect=lambda buf, format: buf.write(b"PNG-DATA"))

        with patch("services.document_service.convert_from_bytes", return_value=[mock_image]):
            result = document_service.generate_preview(b"%PDF-fake", page=1, dpi=72)

        assert result == b"PNG-DATA"


class TestCountPages:
    def test_returns_page_count(self):
        with patch(
            "services.document_service.pdfinfo_from_bytes",
            return_value={"Pages": 42},
        ):
            assert document_service._count_pages(b"pdf") == 42

    def test_returns_none_on_error(self):
        with patch(
            "services.document_service.pdfinfo_from_bytes",
            side_effect=FileNotFoundError("poppler not found"),
        ):
            assert document_service._count_pages(b"pdf") is None


class TestDelete:
    @pytest.mark.asyncio
    async def test_delete_removes_file_and_records(self, tmp_path, monkeypatch):
        monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path))

        # Create a fake file
        fake_file = tmp_path / "test.pdf"
        fake_file.write_bytes(b"content")

        doc = Document(
            id="doc-1",
            filename="test.pdf",
            storage_path=str(fake_file),
        )

        with (
            patch("persistence.document_repo.find_by_id", AsyncMock(return_value=doc)),
            patch("persistence.analysis_repo.delete_by_document", AsyncMock(return_value=2)),
            patch("persistence.document_repo.delete", AsyncMock(return_value=True)),
        ):
            result = await document_service.delete("doc-1")

        assert result is True
        assert not fake_file.exists()

    @pytest.mark.asyncio
    async def test_delete_refuses_file_outside_upload_dir(self, tmp_path, monkeypatch):
        """Files outside UPLOAD_DIR should not be deleted (path traversal protection)."""
        monkeypatch.setattr(document_service, "UPLOAD_DIR", str(tmp_path / "uploads"))
        os.makedirs(tmp_path / "uploads", exist_ok=True)

        # File is outside the upload dir
        outside_file = tmp_path / "secret.txt"
        outside_file.write_bytes(b"secret")

        doc = Document(id="doc-1", filename="x.pdf", storage_path=str(outside_file))

        with (
            patch("persistence.document_repo.find_by_id", AsyncMock(return_value=doc)),
            patch("persistence.analysis_repo.delete_by_document", AsyncMock(return_value=0)),
            patch("persistence.document_repo.delete", AsyncMock(return_value=True)),
        ):
            await document_service.delete("doc-1")

        # File should NOT have been deleted
        assert outside_file.exists()

    @pytest.mark.asyncio
    async def test_delete_not_found_returns_false(self):
        with patch("persistence.document_repo.find_by_id", AsyncMock(return_value=None)):
            result = await document_service.delete("missing")
        assert result is False