Spaces:

Tonic
/

GOT-OCR

Running

App Files Files Community

Tonic commited on Jun 13

Commit

8bebde5

verified ·

1 Parent(s): a5d7d17

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -16

app.py CHANGED Viewed

@@ -43,22 +43,6 @@ def image_to_base64(image):
 @spaces.GPU()
 def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
-    """
-    Process an input image using the OCR model based on the specified task.
-    Args:
-        image (Union[dict, np.ndarray, str, PIL.Image]): Input image in various formats
-        task (str): Type of OCR task to perform
-        ocr_type (str, optional): Type of OCR processing ('ocr' or 'format')
-        ocr_box (str, optional): Bounding box coordinates for fine-grained OCR
-        ocr_color (str, optional): Color specification for fine-grained OCR
-    Returns:
-        tuple: (result_text, html_content, unique_id)
-            - result_text (str): OCR processing result or error message
-            - html_content (str): HTML content for visualization if applicable
-            - unique_id (str): Unique identifier for the processed image
-    """
     if image is None:
         return "Error: No image provided", None, None
@@ -191,6 +175,39 @@ def parse_latex_output(res):
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
     res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
     if isinstance(res, str) and res.startswith("Error:"):

 @spaces.GPU()
 def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
     if image is None:
         return "Error: No image provided", None, None
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
+    """
+    Main OCR demonstration function that processes images and returns results.
+    Args:
+        image (Union[dict, np.ndarray, str, PIL.Image]): Input image in one of these formats:
+            - dict: Image component state with keys:
+                - path: str | None (Path to local file)
+                - url: str | None (Public URL or base64 image)
+                - size: int | None (Image size in bytes)
+                - orig_name: str | None (Original filename)
+                - mime_type: str | None (Image MIME type)
+                - is_stream: bool (Always False)
+                - meta: dict(str, Any)
+            - dict: ImageEditor component state with keys:
+                - background: filepath | None
+                - layers: list[filepath]
+                - composite: filepath | None
+                - id: str | None
+            - np.ndarray: Raw image array
+            - str: Path to image file
+            - PIL.Image: PIL Image object
+        task (Literal['Plain Text OCR', 'Format Text OCR', 'Fine-grained OCR (Box)',
+              'Fine-grained OCR (Color)', 'Multi-crop OCR', 'Render Formatted OCR']):
+            Selected OCR task type
+        ocr_type (Literal['ocr', 'format']): Type of OCR processing
+        ocr_box (str): Bounding box coordinates in format "x1,y1,x2,y2"
+        ocr_color (Literal['red', 'green', 'blue']): Color specification for fine-grained OCR
+    Returns:
+        tuple: (formatted_result, html_output)
+            - formatted_result (str): Formatted OCR result text
+            - html_output (str): HTML visualization if applicable
+    """
     res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
     if isinstance(res, str) and res.startswith("Error:"):