Spaces:
Running
Running
Luis J Camargo
commited on
Commit
·
d125128
1
Parent(s):
cb29984
demo images and output text
Browse files
app.py
CHANGED
|
@@ -93,35 +93,25 @@ def inference(img):
|
|
| 93 |
if not result or len(result) == 0:
|
| 94 |
return "No text detected in the image."
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
|
| 98 |
-
print(f"Result content: {result}")
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
for item in result:
|
| 108 |
-
if isinstance(item, dict):
|
| 109 |
-
# If result is a dict with 'text' and 'confidence'
|
| 110 |
-
text = item.get('text', str(item))
|
| 111 |
-
confidence = item.get('confidence', 1.0)
|
| 112 |
-
output_lines.append(f"| {text} | {confidence:.2%} |")
|
| 113 |
-
elif isinstance(item, (list, tuple)) and len(item) >= 2:
|
| 114 |
-
# If result is like [(bbox, (text, confidence)), ...]
|
| 115 |
-
text = item[1][0] if isinstance(item[1], (list, tuple)) else str(item[1])
|
| 116 |
-
confidence = item[1][1] if isinstance(item[1], (list, tuple)) and len(item[1]) > 1 else 1.0
|
| 117 |
-
output_lines.append(f"| {text} | {confidence:.2%} |")
|
| 118 |
-
else:
|
| 119 |
-
# Fallback: just show the item
|
| 120 |
-
output_lines.append(f"| {str(item)} | N/A |")
|
| 121 |
-
else:
|
| 122 |
-
output_lines.append(f"| {str(result)} | N/A |")
|
| 123 |
|
| 124 |
-
|
|
|
|
| 125 |
|
| 126 |
except Exception as e:
|
| 127 |
import traceback
|
|
@@ -145,18 +135,23 @@ detect and recognize the text.
|
|
| 145 |
'''
|
| 146 |
|
| 147 |
examples = [
|
| 148 |
-
['
|
| 149 |
-
['
|
| 150 |
-
['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
]
|
| 152 |
|
| 153 |
example_labels = """
|
| 154 |
### Example Images:
|
| 155 |
| Image | Language | Description |
|
| 156 |
|-------|----------|-------------|
|
| 157 |
-
|
|
| 158 |
-
|
|
| 159 |
-
|
|
| 160 |
"""
|
| 161 |
|
| 162 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"
|
|
|
|
| 93 |
if not result or len(result) == 0:
|
| 94 |
return "No text detected in the image."
|
| 95 |
|
| 96 |
+
# Extract only the text content from PaddleOCRVL result
|
| 97 |
+
extracted_texts = []
|
|
|
|
| 98 |
|
| 99 |
+
for item in result:
|
| 100 |
+
if isinstance(item, dict):
|
| 101 |
+
# Look for 'layout_parsing_res' which contains the actual text blocks
|
| 102 |
+
if 'layout_parsing_res' in item:
|
| 103 |
+
for block in item['layout_parsing_res']:
|
| 104 |
+
if 'content' in block:
|
| 105 |
+
extracted_texts.append(block['content'])
|
| 106 |
+
# Fallback: look for 'content' directly
|
| 107 |
+
elif 'content' in item:
|
| 108 |
+
extracted_texts.append(item['content'])
|
| 109 |
|
| 110 |
+
if not extracted_texts:
|
| 111 |
+
return "No text could be extracted from the image."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
# Join all text blocks with double newlines
|
| 114 |
+
return "\n\n".join(extracted_texts)
|
| 115 |
|
| 116 |
except Exception as e:
|
| 117 |
import traceback
|
|
|
|
| 135 |
'''
|
| 136 |
|
| 137 |
examples = [
|
| 138 |
+
['cco.jpg'],
|
| 139 |
+
['cnt.jpg'],
|
| 140 |
+
['cuc.jpg'],
|
| 141 |
+
['maj.jpg'],
|
| 142 |
+
['mir.jpg'],
|
| 143 |
+
['ote.jpg'],
|
| 144 |
+
['otm.jpg'],
|
| 145 |
+
['tku.jpg'],
|
| 146 |
]
|
| 147 |
|
| 148 |
example_labels = """
|
| 149 |
### Example Images:
|
| 150 |
| Image | Language | Description |
|
| 151 |
|-------|----------|-------------|
|
| 152 |
+
| cco.jpg | Comaltepec Chinantec | Classical Nahuatl text with traditional glyphs |
|
| 153 |
+
| cnt.jpg | Tepetotutla Chiantec | Contemporary Maya writing with diacritics |
|
| 154 |
+
| cuc.jpg | Usila Chinantec | Zapotec text from Oaxaca region |
|
| 155 |
"""
|
| 156 |
|
| 157 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"
|
cco.jpg
ADDED
|
cnt.jpg
ADDED
|
cuc.jpg
ADDED
|
maj.jpg
ADDED
|
mir.jpg
ADDED
|
ote.jpg
ADDED
|
otm.jpg
ADDED
|
tku.jpg
ADDED
|