File size: 5,094 Bytes
5539271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""Tests for API schemas — camelCase serialization and validation."""

import pytest

from api.schemas import (
    AnalysisResponse,
    CreateAnalysisRequest,
    DocumentResponse,
    PipelineOptionsRequest,
    _to_camel,
)


class TestToCamel:
    def test_single_word(self):
        assert _to_camel("status") == "status"

    def test_two_words(self):
        assert _to_camel("document_id") == "documentId"

    def test_three_words(self):
        assert _to_camel("content_html_raw") == "contentHtmlRaw"

    def test_already_camel(self):
        assert _to_camel("documentId") == "documentId"


class TestDocumentResponse:
    def test_serializes_to_camel_case(self):
        doc = DocumentResponse(
            id="1",
            filename="test.pdf",
            content_type="application/pdf",
            file_size=1024,
            page_count=5,
            created_at="2024-01-01",
        )
        data = doc.model_dump(by_alias=True)
        assert "contentType" in data
        assert "fileSize" in data
        assert "pageCount" in data
        assert "createdAt" in data
        # Original snake_case should not appear
        assert "content_type" not in data
        assert "file_size" not in data

    def test_optional_fields_default_to_none(self):
        doc = DocumentResponse(id="1", filename="test.pdf", created_at="2024-01-01")
        assert doc.content_type is None
        assert doc.file_size is None
        assert doc.page_count is None


class TestAnalysisResponse:
    def test_serializes_to_camel_case(self):
        resp = AnalysisResponse(
            id="1",
            document_id="d1",
            status="COMPLETED",
            content_markdown="# Hello",
            pages_json="[]",
            created_at="2024-01-01",
        )
        data = resp.model_dump(by_alias=True)
        assert "documentId" in data
        assert "contentMarkdown" in data
        assert "pagesJson" in data
        assert "errorMessage" in data
        assert "startedAt" in data
        assert "completedAt" in data

    def test_populate_by_name(self):
        """Can create using snake_case field names."""
        resp = AnalysisResponse(
            id="1",
            document_id="d1",
            status="PENDING",
            created_at="2024-01-01",
        )
        assert resp.document_id == "d1"


class TestPipelineOptionsRequest:
    def test_defaults(self):
        opts = PipelineOptionsRequest()
        assert opts.do_ocr is True
        assert opts.do_table_structure is True
        assert opts.table_mode == "accurate"
        assert opts.do_code_enrichment is False
        assert opts.do_formula_enrichment is False
        assert opts.do_picture_classification is False
        assert opts.do_picture_description is False
        assert opts.generate_picture_images is False
        assert opts.generate_page_images is False
        assert opts.images_scale == 1.0

    def test_custom_values(self):
        opts = PipelineOptionsRequest(
            do_ocr=False,
            table_mode="fast",
            do_code_enrichment=True,
            images_scale=2.0,
        )
        assert opts.do_ocr is False
        assert opts.table_mode == "fast"
        assert opts.do_code_enrichment is True
        assert opts.images_scale == 2.0

    def test_model_dump(self):
        opts = PipelineOptionsRequest(do_ocr=False)
        data = opts.model_dump()
        assert data["do_ocr"] is False
        assert data["do_table_structure"] is True  # default preserved

    def test_invalid_table_mode_rejected(self):
        with pytest.raises(ValueError, match='table_mode must be "accurate" or "fast"'):
            PipelineOptionsRequest(table_mode="invalid")

    def test_negative_images_scale_rejected(self):
        with pytest.raises(ValueError, match="images_scale must be between"):
            PipelineOptionsRequest(images_scale=-1.0)

    def test_zero_images_scale_rejected(self):
        with pytest.raises(ValueError, match="images_scale must be between"):
            PipelineOptionsRequest(images_scale=0)

    def test_excessive_images_scale_rejected(self):
        with pytest.raises(ValueError, match="images_scale must be between"):
            PipelineOptionsRequest(images_scale=11.0)

    def test_boundary_images_scale_accepted(self):
        opts = PipelineOptionsRequest(images_scale=0.1)
        assert opts.images_scale == 0.1
        opts2 = PipelineOptionsRequest(images_scale=10.0)
        assert opts2.images_scale == 10.0


class TestCreateAnalysisRequest:
    def test_parses_document_id(self):
        req = CreateAnalysisRequest(documentId="doc-42")
        assert req.documentId == "doc-42"
        assert req.pipelineOptions is None

    def test_parses_with_pipeline_options(self):
        req = CreateAnalysisRequest(
            documentId="doc-42",
            pipelineOptions=PipelineOptionsRequest(do_ocr=False, table_mode="fast"),
        )
        assert req.documentId == "doc-42"
        assert req.pipelineOptions.do_ocr is False
        assert req.pipelineOptions.table_mode == "fast"