Spaces:
Sleeping
Sleeping
Vlad Bastina
commited on
Commit
·
a2db978
1
Parent(s):
5c26c18
update
Browse files- app.py +59 -3
- tmp/extracted_frame.png +2 -2
app.py
CHANGED
|
@@ -13,6 +13,64 @@ from utils import (
|
|
| 13 |
overlay_image_top_left,
|
| 14 |
annotate_image_with_phrase_and_label
|
| 15 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Set your API key
|
| 18 |
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") else st.secrets["GOOGLE_API_KEY"]
|
|
@@ -42,9 +100,7 @@ if st.button("Generate Thumbnail"):
|
|
| 42 |
extract_frame(video_path, timestamp, image_path)
|
| 43 |
|
| 44 |
# BOUNDING BOX
|
| 45 |
-
|
| 46 |
-
bbox_match = re.search(r'\((\d+),\s*(\d+),\s*(\d+),\s*(\d+)\)', response)
|
| 47 |
-
bbox = tuple(map(int, bbox_match.groups())) if bbox_match else None
|
| 48 |
|
| 49 |
center_bbox_in_circle(image_path, bbox, image_path)
|
| 50 |
overlay_image_bottom_right(image_path, "assets/corner.png", image_path)
|
|
|
|
| 13 |
overlay_image_top_left,
|
| 14 |
annotate_image_with_phrase_and_label
|
| 15 |
)
|
| 16 |
+
import numpy as np
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def compute_iou(boxA, boxB):
|
| 20 |
+
xA = max(boxA[0], boxB[0])
|
| 21 |
+
yA = max(boxA[1], boxB[1])
|
| 22 |
+
xB = min(boxA[2], boxB[2])
|
| 23 |
+
yB = min(boxA[3], boxB[3])
|
| 24 |
+
|
| 25 |
+
interArea = max(0, xB - xA) * max(0, yB - yA)
|
| 26 |
+
if interArea == 0:
|
| 27 |
+
return 0.0
|
| 28 |
+
|
| 29 |
+
boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
|
| 30 |
+
boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
|
| 31 |
+
|
| 32 |
+
iou = interArea / float(boxAArea + boxBArea - interArea)
|
| 33 |
+
return iou
|
| 34 |
+
|
| 35 |
+
def extract_voted_bounding_box(image_path, description, num_attempts=5, iou_threshold=0.5):
|
| 36 |
+
bboxes = []
|
| 37 |
+
|
| 38 |
+
# Extract multiple bounding boxes
|
| 39 |
+
for _ in range(num_attempts):
|
| 40 |
+
response = extract_bounding_box(image_path, description)
|
| 41 |
+
match = re.search(r'\((\d+),\s*(\d+),\s*(\d+),\s*(\d+)\)', response)
|
| 42 |
+
if match:
|
| 43 |
+
bbox = tuple(map(int, match.groups()))
|
| 44 |
+
bboxes.append(bbox)
|
| 45 |
+
|
| 46 |
+
if not bboxes:
|
| 47 |
+
return None
|
| 48 |
+
|
| 49 |
+
# Group boxes by IoU
|
| 50 |
+
groups = []
|
| 51 |
+
used = set()
|
| 52 |
+
|
| 53 |
+
for i in range(len(bboxes)):
|
| 54 |
+
if i in used:
|
| 55 |
+
continue
|
| 56 |
+
group = [bboxes[i]]
|
| 57 |
+
used.add(i)
|
| 58 |
+
for j in range(i+1, len(bboxes)):
|
| 59 |
+
if j in used:
|
| 60 |
+
continue
|
| 61 |
+
if compute_iou(bboxes[i], bboxes[j]) > iou_threshold:
|
| 62 |
+
group.append(bboxes[j])
|
| 63 |
+
used.add(j)
|
| 64 |
+
if len(group) >= 3:
|
| 65 |
+
groups.append(group)
|
| 66 |
+
|
| 67 |
+
if not groups:
|
| 68 |
+
return bboxes[0] # Fallback to the first bounding box if no groups found
|
| 69 |
+
|
| 70 |
+
# Use the group with the most members
|
| 71 |
+
best_group = max(groups, key=len)
|
| 72 |
+
voted_bbox = np.mean(np.array(best_group), axis=0).astype(int)
|
| 73 |
+
return tuple(voted_bbox)
|
| 74 |
|
| 75 |
# Set your API key
|
| 76 |
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") else st.secrets["GOOGLE_API_KEY"]
|
|
|
|
| 100 |
extract_frame(video_path, timestamp, image_path)
|
| 101 |
|
| 102 |
# BOUNDING BOX
|
| 103 |
+
bbox = extract_voted_bounding_box(image_path, description)
|
|
|
|
|
|
|
| 104 |
|
| 105 |
center_bbox_in_circle(image_path, bbox, image_path)
|
| 106 |
overlay_image_bottom_right(image_path, "assets/corner.png", image_path)
|
tmp/extracted_frame.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|