Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -43,22 +43,6 @@ def image_to_base64(image):
|
|
| 43 |
|
| 44 |
@spaces.GPU()
|
| 45 |
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
| 46 |
-
"""
|
| 47 |
-
Process an input image using the OCR model based on the specified task.
|
| 48 |
-
|
| 49 |
-
Args:
|
| 50 |
-
image (Union[dict, np.ndarray, str, PIL.Image]): Input image in various formats
|
| 51 |
-
task (str): Type of OCR task to perform
|
| 52 |
-
ocr_type (str, optional): Type of OCR processing ('ocr' or 'format')
|
| 53 |
-
ocr_box (str, optional): Bounding box coordinates for fine-grained OCR
|
| 54 |
-
ocr_color (str, optional): Color specification for fine-grained OCR
|
| 55 |
-
|
| 56 |
-
Returns:
|
| 57 |
-
tuple: (result_text, html_content, unique_id)
|
| 58 |
-
- result_text (str): OCR processing result or error message
|
| 59 |
-
- html_content (str): HTML content for visualization if applicable
|
| 60 |
-
- unique_id (str): Unique identifier for the processed image
|
| 61 |
-
"""
|
| 62 |
if image is None:
|
| 63 |
return "Error: No image provided", None, None
|
| 64 |
|
|
@@ -191,6 +175,39 @@ def parse_latex_output(res):
|
|
| 191 |
|
| 192 |
|
| 193 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
|
| 195 |
|
| 196 |
if isinstance(res, str) and res.startswith("Error:"):
|
|
|
|
| 43 |
|
| 44 |
@spaces.GPU()
|
| 45 |
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
if image is None:
|
| 47 |
return "Error: No image provided", None, None
|
| 48 |
|
|
|
|
| 175 |
|
| 176 |
|
| 177 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
| 178 |
+
"""
|
| 179 |
+
Main OCR demonstration function that processes images and returns results.
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
image (Union[dict, np.ndarray, str, PIL.Image]): Input image in one of these formats:
|
| 183 |
+
- dict: Image component state with keys:
|
| 184 |
+
- path: str | None (Path to local file)
|
| 185 |
+
- url: str | None (Public URL or base64 image)
|
| 186 |
+
- size: int | None (Image size in bytes)
|
| 187 |
+
- orig_name: str | None (Original filename)
|
| 188 |
+
- mime_type: str | None (Image MIME type)
|
| 189 |
+
- is_stream: bool (Always False)
|
| 190 |
+
- meta: dict(str, Any)
|
| 191 |
+
- dict: ImageEditor component state with keys:
|
| 192 |
+
- background: filepath | None
|
| 193 |
+
- layers: list[filepath]
|
| 194 |
+
- composite: filepath | None
|
| 195 |
+
- id: str | None
|
| 196 |
+
- np.ndarray: Raw image array
|
| 197 |
+
- str: Path to image file
|
| 198 |
+
- PIL.Image: PIL Image object
|
| 199 |
+
task (Literal['Plain Text OCR', 'Format Text OCR', 'Fine-grained OCR (Box)',
|
| 200 |
+
'Fine-grained OCR (Color)', 'Multi-crop OCR', 'Render Formatted OCR']):
|
| 201 |
+
Selected OCR task type
|
| 202 |
+
ocr_type (Literal['ocr', 'format']): Type of OCR processing
|
| 203 |
+
ocr_box (str): Bounding box coordinates in format "x1,y1,x2,y2"
|
| 204 |
+
ocr_color (Literal['red', 'green', 'blue']): Color specification for fine-grained OCR
|
| 205 |
+
|
| 206 |
+
Returns:
|
| 207 |
+
tuple: (formatted_result, html_output)
|
| 208 |
+
- formatted_result (str): Formatted OCR result text
|
| 209 |
+
- html_output (str): HTML visualization if applicable
|
| 210 |
+
"""
|
| 211 |
res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
|
| 212 |
|
| 213 |
if isinstance(res, str) and res.startswith("Error:"):
|