Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,6 @@ CLASS_NAMES = [
|
|
| 16 |
"card_oracle_text", "card_power_toughness"
|
| 17 |
]
|
| 18 |
|
| 19 |
-
# Define inference function
|
| 20 |
def segment_card(image):
|
| 21 |
image = np.array(image) # Convert PIL image to NumPy array
|
| 22 |
results = model(image) # Run YOLO inference
|
|
@@ -24,45 +23,57 @@ def segment_card(image):
|
|
| 24 |
# Convert to OpenCV format
|
| 25 |
annotated_image = image.copy()
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
# Extract bounding boxes and labels
|
| 28 |
for result in results:
|
| 29 |
for box in result.boxes:
|
| 30 |
x1, y1, x2, y2 = map(int, box.xyxy[0]) # Bounding box coordinates
|
| 31 |
class_id = int(box.cls[0]) # Class index
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
# Draw bounding box
|
| 35 |
-
cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| 36 |
-
|
| 37 |
-
# Draw label text with background
|
| 38 |
-
font = cv2.FONT_HERSHEY_SIMPLEX
|
| 39 |
-
font_scale = 0.5
|
| 40 |
-
font_thickness = 2
|
| 41 |
-
text_size = cv2.getTextSize(label, font, font_scale, font_thickness)[0]
|
| 42 |
-
text_x, text_y = x1, y1 - 10
|
| 43 |
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
return Image.fromarray(annotated_image) # Convert back to PIL Image
|
| 68 |
|
|
@@ -76,4 +87,4 @@ iface = gr.Interface(
|
|
| 76 |
)
|
| 77 |
|
| 78 |
# Launch the app
|
| 79 |
-
iface.launch()
|
|
|
|
| 16 |
"card_oracle_text", "card_power_toughness"
|
| 17 |
]
|
| 18 |
|
|
|
|
| 19 |
def segment_card(image):
|
| 20 |
image = np.array(image) # Convert PIL image to NumPy array
|
| 21 |
results = model(image) # Run YOLO inference
|
|
|
|
| 23 |
# Convert to OpenCV format
|
| 24 |
annotated_image = image.copy()
|
| 25 |
|
| 26 |
+
# Dictionary to store the highest confidence detection for each class
|
| 27 |
+
best_detections = {}
|
| 28 |
+
|
| 29 |
# Extract bounding boxes and labels
|
| 30 |
for result in results:
|
| 31 |
for box in result.boxes:
|
| 32 |
x1, y1, x2, y2 = map(int, box.xyxy[0]) # Bounding box coordinates
|
| 33 |
class_id = int(box.cls[0]) # Class index
|
| 34 |
+
conf = float(box.conf[0]) # Confidence score
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
# Check if we have seen this class before
|
| 37 |
+
if class_id not in best_detections or conf > best_detections[class_id]["conf"]:
|
| 38 |
+
best_detections[class_id] = {"bbox": (x1, y1, x2, y2), "conf": conf}
|
| 39 |
+
|
| 40 |
+
# Draw only the best bounding box for each class
|
| 41 |
+
for class_id, data in best_detections.items():
|
| 42 |
+
x1, y1, x2, y2 = data["bbox"]
|
| 43 |
+
label = CLASS_NAMES[class_id]
|
| 44 |
+
|
| 45 |
+
# Draw bounding box
|
| 46 |
+
cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| 47 |
+
|
| 48 |
+
# Draw label text with background
|
| 49 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
| 50 |
+
font_scale = 0.5
|
| 51 |
+
font_thickness = 2
|
| 52 |
+
text_size = cv2.getTextSize(label, font, font_scale, font_thickness)[0]
|
| 53 |
+
text_x, text_y = x1, y1 - 10
|
| 54 |
+
|
| 55 |
+
# Ensure text doesn't go out of bounds
|
| 56 |
+
text_y = max(text_y, text_size[1] + 10)
|
| 57 |
+
|
| 58 |
+
# Draw filled rectangle for text background
|
| 59 |
+
cv2.rectangle(
|
| 60 |
+
annotated_image,
|
| 61 |
+
(text_x, text_y - text_size[1] - 5),
|
| 62 |
+
(text_x + text_size[0] + 5, text_y + 5),
|
| 63 |
+
(0, 255, 0),
|
| 64 |
+
-1
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Put text label on the image
|
| 68 |
+
cv2.putText(
|
| 69 |
+
annotated_image,
|
| 70 |
+
label,
|
| 71 |
+
(text_x, text_y),
|
| 72 |
+
font,
|
| 73 |
+
font_scale,
|
| 74 |
+
(0, 0, 0), # Text color (black for contrast)
|
| 75 |
+
font_thickness
|
| 76 |
+
)
|
| 77 |
|
| 78 |
return Image.fromarray(annotated_image) # Convert back to PIL Image
|
| 79 |
|
|
|
|
| 87 |
)
|
| 88 |
|
| 89 |
# Launch the app
|
| 90 |
+
iface.launch()
|