fix: tolerate non-numeric paddleocr boxes
Browse files
app.py
CHANGED
|
@@ -205,7 +205,11 @@ def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
|
|
| 205 |
if not cleaned:
|
| 206 |
continue
|
| 207 |
lines.append(cleaned)
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
segments.append({
|
| 210 |
"text": cleaned,
|
| 211 |
"confidence": float(confidence),
|
|
|
|
| 205 |
if not cleaned:
|
| 206 |
continue
|
| 207 |
lines.append(cleaned)
|
| 208 |
+
try:
|
| 209 |
+
box_arr = np.asarray(bbox, dtype=float)
|
| 210 |
+
box_serializable = box_arr.tolist()
|
| 211 |
+
except (TypeError, ValueError):
|
| 212 |
+
box_serializable = None
|
| 213 |
segments.append({
|
| 214 |
"text": cleaned,
|
| 215 |
"confidence": float(confidence),
|