File size: 5,139 Bytes
9f97a7a
 
 
 
 
 
e80c4c1
9f97a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80c4c1
9f97a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80c4c1
9f97a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80c4c1
9f97a7a
 
 
 
 
e80c4c1
9f97a7a
 
 
 
 
 
 
 
e80c4c1
9f97a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80c4c1
9f97a7a
e80c4c1
9f97a7a
 
 
e80c4c1
9f97a7a
 
 
 
 
 
 
 
e80c4c1
9f97a7a
 
 
 
 
 
 
 
 
e80c4c1
9f97a7a
 
e80c4c1
 
9f97a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80c4c1
 
9f97a7a
 
 
 
 
 
 
 
 
e80c4c1
9f97a7a
 
 
 
 
 
 
 
 
e80c4c1
9f97a7a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
"""
Pydantic models used throughout the codebase.

In particular, these are the types that are used as input and output of each step of the pipeline.
"""
import json
from typing import Any, Optional, Union, List

from pydantic import BaseModel, Field


class RedirectInput(BaseModel):
    pipeline_arn: str
    job_id: str


class NutrientTableQuantity(BaseModel):
    measurementUnitCode: str
    value: str
    precisionCode: str

    def __str__(self):
        return f"{self.precisionCode} {self.value} {self.measurementUnitCode}"


class NutrientTableDailyValueIntake(BaseModel):
    value: str
    precisionCode: str

    def __str__(self):
        return f"{self.precisionCode} {self.value}%"


def s(
    text: Optional[Union[NutrientTableQuantity, NutrientTableDailyValueIntake, str]]
) -> str:
    """
    Returns None as "?", otherwise argument as string.
    """
    if text:
        return str(text)
    else:
        return "?"


class NutrientTableElement(BaseModel):
    coordinates: str
    probability: float
    nutrientTypeCode: Optional[str]
    quantityContained: NutrientTableQuantity
    dailyValueIntakePercent: Optional[NutrientTableDailyValueIntake]
    precisionCode: str

    def __str__(self):
        return " ".join(
            [
                s(self.nutrientTypeCode),
                s(self.quantityContained),
                f"({s(self.dailyValueIntakePercent)})",
            ]
        )


class NutrientTable(BaseModel):
    nutrientBasisQuantityValue: Optional[str]
    nutrientBasisQuantityMeasurementUnitCode: Optional[str]
    preperationStateCode: Optional[str]
    values: List[NutrientTableElement]

    def __str__(self):
        top = "Nutrients per " + " ".join(
            [
                s(self.nutrientBasisQuantityValue),
                s(self.nutrientBasisQuantityMeasurementUnitCode),
                f"({s(self.preperationStateCode)})",
            ]
        )
        vals = "\n\t".join([str(v) for v in self.values])
        return f"{top}\n\t{vals}"


class Attribute(BaseModel):
    coordinates: str
    entity: str
    probability: float
    value: Union[str, List[NutrientTable]]
    model: str


class AttributeCommunicationChannel(BaseModel):
    coordinates: str
    probability: float
    model: str
    entity: str
    communicationChannelCode: str
    communicationValue: str


class AttributeAllergen(BaseModel):
    coordinates: str
    probability: float
    model: str
    entity: str
    allergenTypeCode: str
    levelOfContainmentCode: str


class NetContentAttribute(BaseModel):
    coordinates: str
    probability: float
    model: str
    entity: str
    measurementUnitCode: str
    value: str


class AllergensOut(BaseModel):
    entity: str
    values: List[AttributeAllergen]
    model: str


class CommunicationChannelsOut(BaseModel):
    entity: str
    values: List[AttributeCommunicationChannel]
    model: str


class PipelineInput(BaseModel):
    image_key: str


class PipelineOutput(BaseModel):
    attributes: List[
        Union[Attribute, CommunicationChannelsOut, AllergensOut, NetContentAttribute]
    ]
    job_id: str = Field(alias="job-id")
    text: str

    class Config:
        allow_population_by_field_name = True


class TextWithLanguage(BaseModel):
    text: str
    lang_code: str


class OCRTextOut(BaseModel):
    blocks: List[str]
    full_text: str
    sentences: List[TextWithLanguage]


class OCRTableOut(BaseModel):
    tables: List[List[List[str]]]


class OCROut(BaseModel):
    result: Union[OCRTextOut, OCRTableOut]
    job_id: str


class OCROutList(BaseModel):
    __root__: List[OCROut]

    def __iter__(self):
        return iter(self.__root__)

    def __getitem__(self, item):
        return self.__root__[item]


class OCRWrapperOut(BaseModel):
    blocks: List[str]
    full_text: str
    job_id: str
    sentences: List[TextWithLanguage]
    tables: List[List[List[str]]]


class ClassifiedText(BaseModel):
    text: str
    attribute: str
    confidence: float


class CommunicationChannels(BaseModel):
    confidence: float
    attribute: str
    communicationChannelCode: str
    communicationValue: str
    text: Optional[str] = ""


class Allergen(BaseModel):
    confidence: float
    attribute: str
    allergenTypeCode: str
    levelOfContainmentCode: str
    text: Optional[str] = ""


class NetContent(BaseModel):
    confidence: float
    attribute: str
    measurementUnitCode: str
    value: str
    text: Optional[str] = ""


class ModelOut(BaseModel):
    blocks: List[Union[NetContent, Allergen, CommunicationChannels, ClassifiedText]]
    tables: Optional[List[NutrientTable]]
    job_id: str
    model: str
    full_text: str

    def toJSON(self):
        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True)


class ModelOutList(BaseModel):
    __root__: List[ModelOut]

    def __iter__(self):
        return iter(self.__root__)

    def __getitem__(self, item):
        return self.__root__[item]


class TrainModelOut(BaseModel):
    # To be defined later when we have a List of accepted formats
    model: Optional[Any] = None
    artifacts: Optional[Any] = None