Spaces:
Runtime error
Runtime error
Tonic
commited on
add quad boxes
Browse files
app.py
CHANGED
|
@@ -112,14 +112,24 @@ def fig_to_pil(fig):
|
|
| 112 |
buf.seek(0)
|
| 113 |
return Image.open(buf)
|
| 114 |
|
| 115 |
-
def plot_bbox(image, data):
|
| 116 |
fig, ax = plt.subplots()
|
| 117 |
ax.imshow(image)
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
ax.axis('off')
|
| 124 |
return fig
|
| 125 |
|
|
@@ -173,8 +183,9 @@ def main_process(image, task):
|
|
| 173 |
|
| 174 |
if task in IMAGE_TASKS:
|
| 175 |
if task == "OCR with Region":
|
| 176 |
-
|
| 177 |
-
|
|
|
|
| 178 |
|
| 179 |
# Debugging: Print the recognized text
|
| 180 |
print(f"Recognized Text: {text_output}")
|
|
|
|
| 112 |
buf.seek(0)
|
| 113 |
return Image.open(buf)
|
| 114 |
|
| 115 |
+
def plot_bbox(image, data, use_quad_boxes=False):
|
| 116 |
fig, ax = plt.subplots()
|
| 117 |
ax.imshow(image)
|
| 118 |
+
|
| 119 |
+
# Handle both 'bboxes' and 'quad_boxes'
|
| 120 |
+
if use_quad_boxes:
|
| 121 |
+
for quad_box, label in zip(data['quad_boxes'], data['labels']):
|
| 122 |
+
quad_box = np.array(quad_box).reshape(-1, 2)
|
| 123 |
+
poly = patches.Polygon(quad_box, linewidth=1, edgecolor='r', facecolor='none')
|
| 124 |
+
ax.add_patch(poly)
|
| 125 |
+
plt.text(quad_box[0][0], quad_box[0][1], label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))
|
| 126 |
+
else:
|
| 127 |
+
for bbox, label in zip(data['bboxes'], data['labels']):
|
| 128 |
+
x1, y1, x2, y2 = bbox
|
| 129 |
+
rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor='r', facecolor='none')
|
| 130 |
+
ax.add_patch(rect)
|
| 131 |
+
plt.text(x1, y1, label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))
|
| 132 |
+
|
| 133 |
ax.axis('off')
|
| 134 |
return fig
|
| 135 |
|
|
|
|
| 183 |
|
| 184 |
if task in IMAGE_TASKS:
|
| 185 |
if task == "OCR with Region":
|
| 186 |
+
fig = plot_bbox(image, result['<OCR_WITH_REGION>'], use_quad_boxes=True)
|
| 187 |
+
output_image = fig_to_pil(fig)
|
| 188 |
+
text_output = result.get('<OCR_WITH_REGION>', {}).get('recognized_text', 'No text found')
|
| 189 |
|
| 190 |
# Debugging: Print the recognized text
|
| 191 |
print(f"Recognized Text: {text_output}")
|