fix: handle paddleocr output variants
Browse files
app.py
CHANGED
|
@@ -178,8 +178,26 @@ def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
|
|
| 178 |
for result in ocr_results:
|
| 179 |
if not result:
|
| 180 |
continue
|
| 181 |
-
for
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
if not cleaned:
|
| 184 |
continue
|
| 185 |
lines.append(cleaned)
|
|
|
|
| 178 |
for result in ocr_results:
|
| 179 |
if not result:
|
| 180 |
continue
|
| 181 |
+
for entry in result:
|
| 182 |
+
if not entry:
|
| 183 |
+
continue
|
| 184 |
+
bbox = entry[0]
|
| 185 |
+
text = ""
|
| 186 |
+
confidence = 1.0
|
| 187 |
+
if len(entry) == 2:
|
| 188 |
+
text_info = entry[1]
|
| 189 |
+
if isinstance(text_info, (list, tuple)) and text_info:
|
| 190 |
+
text = text_info[0] or ""
|
| 191 |
+
if len(text_info) > 1 and text_info[1] is not None:
|
| 192 |
+
confidence = float(text_info[1])
|
| 193 |
+
else:
|
| 194 |
+
text = str(text_info)
|
| 195 |
+
elif len(entry) >= 3:
|
| 196 |
+
text = entry[1] or ""
|
| 197 |
+
if entry[2] is not None:
|
| 198 |
+
confidence = float(entry[2])
|
| 199 |
+
|
| 200 |
+
cleaned = text.strip()
|
| 201 |
if not cleaned:
|
| 202 |
continue
|
| 203 |
lines.append(cleaned)
|