Spaces:

WoC
/

retailpxdemo

Build error

App Files Files

leonge commited on Feb 6, 2023

Commit

e80c4c1

1 Parent(s): 5d47f86

Updated pydantic models to support lists

Browse files

Files changed (2) hide show

base/ocr.py +7 -7
data_models.py +17 -17

base/ocr.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Custom types for dealing with the Google Vision API JSON output.
 """
 from enum import IntEnum
-from typing import Any, Optional
 from pydantic import BaseModel
@@ -36,7 +36,7 @@ class DetectedLanguage(BaseModel):
 class TextProperty(BaseModel):
-    detectedLanguages: list[DetectedLanguage]
     detectedBreak: Optional[DetectedBreak]
@@ -50,21 +50,21 @@ class Symbol(BaseModel):
 class Word(BaseModel):
     property: Optional[TextProperty]
     boundingBox: Any
-    symbols: list[Symbol]
     confidence: float
 class Paragraph(BaseModel):
     property: Optional[TextProperty]
     boundingBox: Any
-    words: list[Word]
     confidence: float
 class Block(BaseModel):
     property: Optional[TextProperty]
     boundingBox: Any
-    paragraphs: list[Paragraph]
     blockType: BlockType
     confidence: float
@@ -73,12 +73,12 @@ class Page(BaseModel):
     property: Optional[TextProperty]
     width: int
     height: int
-    blocks: list[Block]
     confidence: float
 class TextAnnotation(BaseModel):
-    pages: list[Page]
     text: str

 Custom types for dealing with the Google Vision API JSON output.
 """
 from enum import IntEnum
+from typing import Any, Optional, List
 from pydantic import BaseModel
 class TextProperty(BaseModel):
+    detectedLanguages: List[DetectedLanguage]
     detectedBreak: Optional[DetectedBreak]
 class Word(BaseModel):
     property: Optional[TextProperty]
     boundingBox: Any
+    symbols: List[Symbol]
     confidence: float
 class Paragraph(BaseModel):
     property: Optional[TextProperty]
     boundingBox: Any
+    words: List[Word]
     confidence: float
 class Block(BaseModel):
     property: Optional[TextProperty]
     boundingBox: Any
+    paragraphs: List[Paragraph]
     blockType: BlockType
     confidence: float
     property: Optional[TextProperty]
     width: int
     height: int
+    blocks: List[Block]
     confidence: float
 class TextAnnotation(BaseModel):
+    pages: List[Page]
     text: str

data_models.py CHANGED Viewed

@@ -4,7 +4,7 @@ Pydantic models used throughout the codebase.
 In particular, these are the types that are used as input and output of each step of the pipeline.
 """
 import json
-from typing import Any, Optional, Union
 from pydantic import BaseModel, Field
@@ -65,7 +65,7 @@ class NutrientTable(BaseModel):
     nutrientBasisQuantityValue: Optional[str]
     nutrientBasisQuantityMeasurementUnitCode: Optional[str]
     preperationStateCode: Optional[str]
-    values: list[NutrientTableElement]
     def __str__(self):
         top = "Nutrients per " + " ".join(
@@ -83,7 +83,7 @@ class Attribute(BaseModel):
     coordinates: str
     entity: str
     probability: float
-    value: Union[str, list[NutrientTable]]
     model: str
@@ -116,13 +116,13 @@ class NetContentAttribute(BaseModel):
 class AllergensOut(BaseModel):
     entity: str
-    values: list[AttributeAllergen]
     model: str
 class CommunicationChannelsOut(BaseModel):
     entity: str
-    values: list[AttributeCommunicationChannel]
     model: str
@@ -131,7 +131,7 @@ class PipelineInput(BaseModel):
 class PipelineOutput(BaseModel):
-    attributes: list[
         Union[Attribute, CommunicationChannelsOut, AllergensOut, NetContentAttribute]
     ]
     job_id: str = Field(alias="job-id")
@@ -147,13 +147,13 @@ class TextWithLanguage(BaseModel):
 class OCRTextOut(BaseModel):
-    blocks: list[str]
     full_text: str
-    sentences: list[TextWithLanguage]
 class OCRTableOut(BaseModel):
-    tables: list[list[list[str]]]
 class OCROut(BaseModel):
@@ -162,7 +162,7 @@ class OCROut(BaseModel):
 class OCROutList(BaseModel):
-    __root__: list[OCROut]
     def __iter__(self):
         return iter(self.__root__)
@@ -172,11 +172,11 @@ class OCROutList(BaseModel):
 class OCRWrapperOut(BaseModel):
-    blocks: list[str]
     full_text: str
     job_id: str
-    sentences: list[TextWithLanguage]
-    tables: list[list[list[str]]]
 class ClassifiedText(BaseModel):
@@ -210,8 +210,8 @@ class NetContent(BaseModel):
 class ModelOut(BaseModel):
-    blocks: list[Union[NetContent, Allergen, CommunicationChannels, ClassifiedText]]
-    tables: Optional[list[NutrientTable]]
     job_id: str
     model: str
     full_text: str
@@ -221,7 +221,7 @@ class ModelOut(BaseModel):
 class ModelOutList(BaseModel):
-    __root__: list[ModelOut]
     def __iter__(self):
         return iter(self.__root__)
@@ -231,6 +231,6 @@ class ModelOutList(BaseModel):
 class TrainModelOut(BaseModel):
-    # To be defined later when we have a list of accepted formats
     model: Optional[Any] = None
     artifacts: Optional[Any] = None

 In particular, these are the types that are used as input and output of each step of the pipeline.
 """
 import json
+from typing import Any, Optional, Union, List
 from pydantic import BaseModel, Field
     nutrientBasisQuantityValue: Optional[str]
     nutrientBasisQuantityMeasurementUnitCode: Optional[str]
     preperationStateCode: Optional[str]
+    values: List[NutrientTableElement]
     def __str__(self):
         top = "Nutrients per " + " ".join(
     coordinates: str
     entity: str
     probability: float
+    value: Union[str, List[NutrientTable]]
     model: str
 class AllergensOut(BaseModel):
     entity: str
+    values: List[AttributeAllergen]
     model: str
 class CommunicationChannelsOut(BaseModel):
     entity: str
+    values: List[AttributeCommunicationChannel]
     model: str
 class PipelineOutput(BaseModel):
+    attributes: List[
         Union[Attribute, CommunicationChannelsOut, AllergensOut, NetContentAttribute]
     ]
     job_id: str = Field(alias="job-id")
 class OCRTextOut(BaseModel):
+    blocks: List[str]
     full_text: str
+    sentences: List[TextWithLanguage]
 class OCRTableOut(BaseModel):
+    tables: List[List[List[str]]]
 class OCROut(BaseModel):
 class OCROutList(BaseModel):
+    __root__: List[OCROut]
     def __iter__(self):
         return iter(self.__root__)
 class OCRWrapperOut(BaseModel):
+    blocks: List[str]
     full_text: str
     job_id: str
+    sentences: List[TextWithLanguage]
+    tables: List[List[List[str]]]
 class ClassifiedText(BaseModel):
 class ModelOut(BaseModel):
+    blocks: List[Union[NetContent, Allergen, CommunicationChannels, ClassifiedText]]
+    tables: Optional[List[NutrientTable]]
     job_id: str
     model: str
     full_text: str
 class ModelOutList(BaseModel):
+    __root__: List[ModelOut]
     def __iter__(self):
         return iter(self.__root__)
 class TrainModelOut(BaseModel):
+    # To be defined later when we have a List of accepted formats
     model: Optional[Any] = None
     artifacts: Optional[Any] = None