Spaces:
Runtime error
Runtime error
Tonic commited on
add quad boxes
Browse files
app.py
CHANGED
|
@@ -9,14 +9,15 @@ from modeling_florence2 import Florence2ForConditionalGeneration
|
|
| 9 |
import io
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
import matplotlib.patches as patches
|
|
|
|
| 12 |
import numpy as np
|
| 13 |
import random
|
| 14 |
import json
|
| 15 |
-
|
|
|
|
| 16 |
with open("config.json", "r") as f:
|
| 17 |
config = json.load(f)
|
| 18 |
|
| 19 |
-
# Extract necessary variables from the config
|
| 20 |
d_model = config['text_config']['d_model']
|
| 21 |
num_layers = config['text_config']['encoder_layers']
|
| 22 |
attention_heads = config['text_config']['encoder_attention_heads']
|
|
@@ -29,12 +30,10 @@ no_repeat_ngram_size = config['text_config']['no_repeat_ngram_size']
|
|
| 29 |
patch_size = config['vision_config']['patch_size'][0]
|
| 30 |
temporal_embeddings = config['vision_config']['visual_temporal_embedding']['max_temporal_embeddings']
|
| 31 |
|
| 32 |
-
|
| 33 |
title = """# 🙋🏻♂️Welcome to Tonic's PLeIAs/📸📈✍🏻Florence-PDF"""
|
| 34 |
description = """
|
| 35 |
---
|
| 36 |
|
| 37 |
-
|
| 38 |
This application showcases the **PLeIAs/📸📈✍🏻Florence-PDF** model, a powerful AI system designed for both **text and image generation tasks**. The model is capable of handling complex tasks such as object detection, image captioning, OCR (Optical Character Recognition), and detailed region-based image analysis.
|
| 39 |
|
| 40 |
### **How to Use**:
|
|
@@ -119,7 +118,6 @@ def plot_bbox(image, data, use_quad_boxes=False):
|
|
| 119 |
fig, ax = plt.subplots()
|
| 120 |
ax.imshow(image)
|
| 121 |
|
| 122 |
-
# Handle both 'bboxes' and 'quad_boxes'
|
| 123 |
if use_quad_boxes:
|
| 124 |
for quad_box, label in zip(data.get('quad_boxes', []), data.get('labels', [])):
|
| 125 |
quad_box = np.array(quad_box).reshape(-1, 2)
|
|
@@ -156,19 +154,11 @@ def draw_ocr_bboxes(image, prediction):
|
|
| 156 |
def draw_bounding_boxes(image, quad_boxes, labels, color=(0, 255, 0), thickness=2):
|
| 157 |
"""
|
| 158 |
Draws quadrilateral bounding boxes on the image.
|
| 159 |
-
|
| 160 |
-
Args:
|
| 161 |
-
image: The original image where the bounding boxes will be drawn.
|
| 162 |
-
quad_boxes: List of quadrilateral bounding box points. Each bounding box contains four points.
|
| 163 |
-
labels: List of labels corresponding to each bounding box.
|
| 164 |
-
color: Color of the bounding box. Default is green.
|
| 165 |
-
thickness: Thickness of the bounding box lines. Default is 2.
|
| 166 |
"""
|
| 167 |
for i, quad in enumerate(quad_boxes):
|
| 168 |
points = np.array(quad, dtype=np.int32).reshape((-1, 1, 2)) # Reshape the quad points for drawing
|
| 169 |
image = cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
|
| 170 |
-
|
| 171 |
-
label_pos = (int(quad[0]), int(quad[1]) - 10) # Positioning label slightly above the bounding box
|
| 172 |
cv2.putText(image, labels[i], label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
|
| 173 |
|
| 174 |
return image
|
|
|
|
| 9 |
import io
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
import matplotlib.patches as patches
|
| 12 |
+
from matplotlib.patches import Polygon
|
| 13 |
import numpy as np
|
| 14 |
import random
|
| 15 |
import json
|
| 16 |
+
|
| 17 |
+
|
| 18 |
with open("config.json", "r") as f:
|
| 19 |
config = json.load(f)
|
| 20 |
|
|
|
|
| 21 |
d_model = config['text_config']['d_model']
|
| 22 |
num_layers = config['text_config']['encoder_layers']
|
| 23 |
attention_heads = config['text_config']['encoder_attention_heads']
|
|
|
|
| 30 |
patch_size = config['vision_config']['patch_size'][0]
|
| 31 |
temporal_embeddings = config['vision_config']['visual_temporal_embedding']['max_temporal_embeddings']
|
| 32 |
|
|
|
|
| 33 |
title = """# 🙋🏻♂️Welcome to Tonic's PLeIAs/📸📈✍🏻Florence-PDF"""
|
| 34 |
description = """
|
| 35 |
---
|
| 36 |
|
|
|
|
| 37 |
This application showcases the **PLeIAs/📸📈✍🏻Florence-PDF** model, a powerful AI system designed for both **text and image generation tasks**. The model is capable of handling complex tasks such as object detection, image captioning, OCR (Optical Character Recognition), and detailed region-based image analysis.
|
| 38 |
|
| 39 |
### **How to Use**:
|
|
|
|
| 118 |
fig, ax = plt.subplots()
|
| 119 |
ax.imshow(image)
|
| 120 |
|
|
|
|
| 121 |
if use_quad_boxes:
|
| 122 |
for quad_box, label in zip(data.get('quad_boxes', []), data.get('labels', [])):
|
| 123 |
quad_box = np.array(quad_box).reshape(-1, 2)
|
|
|
|
| 154 |
def draw_bounding_boxes(image, quad_boxes, labels, color=(0, 255, 0), thickness=2):
|
| 155 |
"""
|
| 156 |
Draws quadrilateral bounding boxes on the image.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
"""
|
| 158 |
for i, quad in enumerate(quad_boxes):
|
| 159 |
points = np.array(quad, dtype=np.int32).reshape((-1, 1, 2)) # Reshape the quad points for drawing
|
| 160 |
image = cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
|
| 161 |
+
label_pos = (int(quad[0]), int(quad[1]) - 10)
|
|
|
|
| 162 |
cv2.putText(image, labels[i], label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
|
| 163 |
|
| 164 |
return image
|