leonge commited on
Commit
e80c4c1
·
1 Parent(s): 5d47f86

Updated pydantic models to support lists

Browse files
Files changed (2) hide show
  1. base/ocr.py +7 -7
  2. data_models.py +17 -17
base/ocr.py CHANGED
@@ -2,7 +2,7 @@
2
  Custom types for dealing with the Google Vision API JSON output.
3
  """
4
  from enum import IntEnum
5
- from typing import Any, Optional
6
 
7
  from pydantic import BaseModel
8
 
@@ -36,7 +36,7 @@ class DetectedLanguage(BaseModel):
36
 
37
 
38
  class TextProperty(BaseModel):
39
- detectedLanguages: list[DetectedLanguage]
40
  detectedBreak: Optional[DetectedBreak]
41
 
42
 
@@ -50,21 +50,21 @@ class Symbol(BaseModel):
50
  class Word(BaseModel):
51
  property: Optional[TextProperty]
52
  boundingBox: Any
53
- symbols: list[Symbol]
54
  confidence: float
55
 
56
 
57
  class Paragraph(BaseModel):
58
  property: Optional[TextProperty]
59
  boundingBox: Any
60
- words: list[Word]
61
  confidence: float
62
 
63
 
64
  class Block(BaseModel):
65
  property: Optional[TextProperty]
66
  boundingBox: Any
67
- paragraphs: list[Paragraph]
68
  blockType: BlockType
69
  confidence: float
70
 
@@ -73,12 +73,12 @@ class Page(BaseModel):
73
  property: Optional[TextProperty]
74
  width: int
75
  height: int
76
- blocks: list[Block]
77
  confidence: float
78
 
79
 
80
  class TextAnnotation(BaseModel):
81
- pages: list[Page]
82
  text: str
83
 
84
 
 
2
  Custom types for dealing with the Google Vision API JSON output.
3
  """
4
  from enum import IntEnum
5
+ from typing import Any, Optional, List
6
 
7
  from pydantic import BaseModel
8
 
 
36
 
37
 
38
  class TextProperty(BaseModel):
39
+ detectedLanguages: List[DetectedLanguage]
40
  detectedBreak: Optional[DetectedBreak]
41
 
42
 
 
50
  class Word(BaseModel):
51
  property: Optional[TextProperty]
52
  boundingBox: Any
53
+ symbols: List[Symbol]
54
  confidence: float
55
 
56
 
57
  class Paragraph(BaseModel):
58
  property: Optional[TextProperty]
59
  boundingBox: Any
60
+ words: List[Word]
61
  confidence: float
62
 
63
 
64
  class Block(BaseModel):
65
  property: Optional[TextProperty]
66
  boundingBox: Any
67
+ paragraphs: List[Paragraph]
68
  blockType: BlockType
69
  confidence: float
70
 
 
73
  property: Optional[TextProperty]
74
  width: int
75
  height: int
76
+ blocks: List[Block]
77
  confidence: float
78
 
79
 
80
  class TextAnnotation(BaseModel):
81
+ pages: List[Page]
82
  text: str
83
 
84
 
data_models.py CHANGED
@@ -4,7 +4,7 @@ Pydantic models used throughout the codebase.
4
  In particular, these are the types that are used as input and output of each step of the pipeline.
5
  """
6
  import json
7
- from typing import Any, Optional, Union
8
 
9
  from pydantic import BaseModel, Field
10
 
@@ -65,7 +65,7 @@ class NutrientTable(BaseModel):
65
  nutrientBasisQuantityValue: Optional[str]
66
  nutrientBasisQuantityMeasurementUnitCode: Optional[str]
67
  preperationStateCode: Optional[str]
68
- values: list[NutrientTableElement]
69
 
70
  def __str__(self):
71
  top = "Nutrients per " + " ".join(
@@ -83,7 +83,7 @@ class Attribute(BaseModel):
83
  coordinates: str
84
  entity: str
85
  probability: float
86
- value: Union[str, list[NutrientTable]]
87
  model: str
88
 
89
 
@@ -116,13 +116,13 @@ class NetContentAttribute(BaseModel):
116
 
117
  class AllergensOut(BaseModel):
118
  entity: str
119
- values: list[AttributeAllergen]
120
  model: str
121
 
122
 
123
  class CommunicationChannelsOut(BaseModel):
124
  entity: str
125
- values: list[AttributeCommunicationChannel]
126
  model: str
127
 
128
 
@@ -131,7 +131,7 @@ class PipelineInput(BaseModel):
131
 
132
 
133
  class PipelineOutput(BaseModel):
134
- attributes: list[
135
  Union[Attribute, CommunicationChannelsOut, AllergensOut, NetContentAttribute]
136
  ]
137
  job_id: str = Field(alias="job-id")
@@ -147,13 +147,13 @@ class TextWithLanguage(BaseModel):
147
 
148
 
149
  class OCRTextOut(BaseModel):
150
- blocks: list[str]
151
  full_text: str
152
- sentences: list[TextWithLanguage]
153
 
154
 
155
  class OCRTableOut(BaseModel):
156
- tables: list[list[list[str]]]
157
 
158
 
159
  class OCROut(BaseModel):
@@ -162,7 +162,7 @@ class OCROut(BaseModel):
162
 
163
 
164
  class OCROutList(BaseModel):
165
- __root__: list[OCROut]
166
 
167
  def __iter__(self):
168
  return iter(self.__root__)
@@ -172,11 +172,11 @@ class OCROutList(BaseModel):
172
 
173
 
174
  class OCRWrapperOut(BaseModel):
175
- blocks: list[str]
176
  full_text: str
177
  job_id: str
178
- sentences: list[TextWithLanguage]
179
- tables: list[list[list[str]]]
180
 
181
 
182
  class ClassifiedText(BaseModel):
@@ -210,8 +210,8 @@ class NetContent(BaseModel):
210
 
211
 
212
  class ModelOut(BaseModel):
213
- blocks: list[Union[NetContent, Allergen, CommunicationChannels, ClassifiedText]]
214
- tables: Optional[list[NutrientTable]]
215
  job_id: str
216
  model: str
217
  full_text: str
@@ -221,7 +221,7 @@ class ModelOut(BaseModel):
221
 
222
 
223
  class ModelOutList(BaseModel):
224
- __root__: list[ModelOut]
225
 
226
  def __iter__(self):
227
  return iter(self.__root__)
@@ -231,6 +231,6 @@ class ModelOutList(BaseModel):
231
 
232
 
233
  class TrainModelOut(BaseModel):
234
- # To be defined later when we have a list of accepted formats
235
  model: Optional[Any] = None
236
  artifacts: Optional[Any] = None
 
4
  In particular, these are the types that are used as input and output of each step of the pipeline.
5
  """
6
  import json
7
+ from typing import Any, Optional, Union, List
8
 
9
  from pydantic import BaseModel, Field
10
 
 
65
  nutrientBasisQuantityValue: Optional[str]
66
  nutrientBasisQuantityMeasurementUnitCode: Optional[str]
67
  preperationStateCode: Optional[str]
68
+ values: List[NutrientTableElement]
69
 
70
  def __str__(self):
71
  top = "Nutrients per " + " ".join(
 
83
  coordinates: str
84
  entity: str
85
  probability: float
86
+ value: Union[str, List[NutrientTable]]
87
  model: str
88
 
89
 
 
116
 
117
  class AllergensOut(BaseModel):
118
  entity: str
119
+ values: List[AttributeAllergen]
120
  model: str
121
 
122
 
123
  class CommunicationChannelsOut(BaseModel):
124
  entity: str
125
+ values: List[AttributeCommunicationChannel]
126
  model: str
127
 
128
 
 
131
 
132
 
133
  class PipelineOutput(BaseModel):
134
+ attributes: List[
135
  Union[Attribute, CommunicationChannelsOut, AllergensOut, NetContentAttribute]
136
  ]
137
  job_id: str = Field(alias="job-id")
 
147
 
148
 
149
  class OCRTextOut(BaseModel):
150
+ blocks: List[str]
151
  full_text: str
152
+ sentences: List[TextWithLanguage]
153
 
154
 
155
  class OCRTableOut(BaseModel):
156
+ tables: List[List[List[str]]]
157
 
158
 
159
  class OCROut(BaseModel):
 
162
 
163
 
164
  class OCROutList(BaseModel):
165
+ __root__: List[OCROut]
166
 
167
  def __iter__(self):
168
  return iter(self.__root__)
 
172
 
173
 
174
  class OCRWrapperOut(BaseModel):
175
+ blocks: List[str]
176
  full_text: str
177
  job_id: str
178
+ sentences: List[TextWithLanguage]
179
+ tables: List[List[List[str]]]
180
 
181
 
182
  class ClassifiedText(BaseModel):
 
210
 
211
 
212
  class ModelOut(BaseModel):
213
+ blocks: List[Union[NetContent, Allergen, CommunicationChannels, ClassifiedText]]
214
+ tables: Optional[List[NutrientTable]]
215
  job_id: str
216
  model: str
217
  full_text: str
 
221
 
222
 
223
  class ModelOutList(BaseModel):
224
+ __root__: List[ModelOut]
225
 
226
  def __iter__(self):
227
  return iter(self.__root__)
 
231
 
232
 
233
  class TrainModelOut(BaseModel):
234
+ # To be defined later when we have a List of accepted formats
235
  model: Optional[Any] = None
236
  artifacts: Optional[Any] = None