Spaces:
Running
Running
Luis J Camargo
commited on
Commit
·
9c692ff
1
Parent(s):
f9963b2
warning and text display attempt
Browse files
app.py
CHANGED
|
@@ -109,14 +109,32 @@ def inference(img):
|
|
| 109 |
if not result or len(result) == 0:
|
| 110 |
return "No text detected in the image."
|
| 111 |
|
| 112 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
extracted_texts = []
|
| 114 |
|
| 115 |
-
for page in
|
| 116 |
-
if
|
| 117 |
-
for block in page
|
| 118 |
-
if
|
| 119 |
-
extracted_texts.append(block
|
| 120 |
|
| 121 |
if not extracted_texts:
|
| 122 |
return "No text could be extracted from the image."
|
|
@@ -142,6 +160,8 @@ the diverse character and glyph repertoire of Mexico's 68 indigenous languages.
|
|
| 142 |
**How to use:** Simply upload an image containing text in any Mexican indigenous language, and the model will
|
| 143 |
detect and recognize the text.
|
| 144 |
|
|
|
|
|
|
|
| 145 |
🔗 [PaddleOCR Documentation](https://github.com/PaddlePaddle/PaddleOCR)
|
| 146 |
'''
|
| 147 |
|
|
|
|
| 109 |
if not result or len(result) == 0:
|
| 110 |
return "No text detected in the image."
|
| 111 |
|
| 112 |
+
# Serialize to JSON first (this worked before)
|
| 113 |
+
import json
|
| 114 |
+
|
| 115 |
+
def serialize_for_json(obj):
|
| 116 |
+
"""Convert non-serializable objects to strings"""
|
| 117 |
+
if isinstance(obj, dict):
|
| 118 |
+
return {k: serialize_for_json(v) for k, v in obj.items()}
|
| 119 |
+
elif isinstance(obj, list):
|
| 120 |
+
return [serialize_for_json(item) for item in obj]
|
| 121 |
+
elif hasattr(obj, '__dict__'):
|
| 122 |
+
return serialize_for_json(obj.__dict__)
|
| 123 |
+
elif isinstance(obj, (str, int, float, bool, type(None))):
|
| 124 |
+
return obj
|
| 125 |
+
else:
|
| 126 |
+
return str(type(obj))
|
| 127 |
+
|
| 128 |
+
serialized_result = serialize_for_json(result)
|
| 129 |
+
|
| 130 |
+
# Now extract text from the serialized structure
|
| 131 |
extracted_texts = []
|
| 132 |
|
| 133 |
+
for page in serialized_result:
|
| 134 |
+
if isinstance(page, dict) and 'parsing_res_list' in page:
|
| 135 |
+
for block in page['parsing_res_list']:
|
| 136 |
+
if isinstance(block, dict) and 'content' in block and block['content']:
|
| 137 |
+
extracted_texts.append(block['content'])
|
| 138 |
|
| 139 |
if not extracted_texts:
|
| 140 |
return "No text could be extracted from the image."
|
|
|
|
| 160 |
**How to use:** Simply upload an image containing text in any Mexican indigenous language, and the model will
|
| 161 |
detect and recognize the text.
|
| 162 |
|
| 163 |
+
### Warning: as this free demonstrator space uses only CPU, a small image could take up to 5 minutes, so be patient.
|
| 164 |
+
|
| 165 |
🔗 [PaddleOCR Documentation](https://github.com/PaddlePaddle/PaddleOCR)
|
| 166 |
'''
|
| 167 |
|