Vlad Bastina commited on
Commit
a2db978
·
1 Parent(s): 5c26c18
Files changed (2) hide show
  1. app.py +59 -3
  2. tmp/extracted_frame.png +2 -2
app.py CHANGED
@@ -13,6 +13,64 @@ from utils import (
13
  overlay_image_top_left,
14
  annotate_image_with_phrase_and_label
15
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # Set your API key
18
  os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") else st.secrets["GOOGLE_API_KEY"]
@@ -42,9 +100,7 @@ if st.button("Generate Thumbnail"):
42
  extract_frame(video_path, timestamp, image_path)
43
 
44
  # BOUNDING BOX
45
- response = extract_bounding_box(image_path, description)
46
- bbox_match = re.search(r'\((\d+),\s*(\d+),\s*(\d+),\s*(\d+)\)', response)
47
- bbox = tuple(map(int, bbox_match.groups())) if bbox_match else None
48
 
49
  center_bbox_in_circle(image_path, bbox, image_path)
50
  overlay_image_bottom_right(image_path, "assets/corner.png", image_path)
 
13
  overlay_image_top_left,
14
  annotate_image_with_phrase_and_label
15
  )
16
+ import numpy as np
17
+
18
+
19
+ def compute_iou(boxA, boxB):
20
+ xA = max(boxA[0], boxB[0])
21
+ yA = max(boxA[1], boxB[1])
22
+ xB = min(boxA[2], boxB[2])
23
+ yB = min(boxA[3], boxB[3])
24
+
25
+ interArea = max(0, xB - xA) * max(0, yB - yA)
26
+ if interArea == 0:
27
+ return 0.0
28
+
29
+ boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
30
+ boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
31
+
32
+ iou = interArea / float(boxAArea + boxBArea - interArea)
33
+ return iou
34
+
35
+ def extract_voted_bounding_box(image_path, description, num_attempts=5, iou_threshold=0.5):
36
+ bboxes = []
37
+
38
+ # Extract multiple bounding boxes
39
+ for _ in range(num_attempts):
40
+ response = extract_bounding_box(image_path, description)
41
+ match = re.search(r'\((\d+),\s*(\d+),\s*(\d+),\s*(\d+)\)', response)
42
+ if match:
43
+ bbox = tuple(map(int, match.groups()))
44
+ bboxes.append(bbox)
45
+
46
+ if not bboxes:
47
+ return None
48
+
49
+ # Group boxes by IoU
50
+ groups = []
51
+ used = set()
52
+
53
+ for i in range(len(bboxes)):
54
+ if i in used:
55
+ continue
56
+ group = [bboxes[i]]
57
+ used.add(i)
58
+ for j in range(i+1, len(bboxes)):
59
+ if j in used:
60
+ continue
61
+ if compute_iou(bboxes[i], bboxes[j]) > iou_threshold:
62
+ group.append(bboxes[j])
63
+ used.add(j)
64
+ if len(group) >= 3:
65
+ groups.append(group)
66
+
67
+ if not groups:
68
+ return bboxes[0] # Fallback to the first bounding box if no groups found
69
+
70
+ # Use the group with the most members
71
+ best_group = max(groups, key=len)
72
+ voted_bbox = np.mean(np.array(best_group), axis=0).astype(int)
73
+ return tuple(voted_bbox)
74
 
75
  # Set your API key
76
  os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") else st.secrets["GOOGLE_API_KEY"]
 
100
  extract_frame(video_path, timestamp, image_path)
101
 
102
  # BOUNDING BOX
103
+ bbox = extract_voted_bounding_box(image_path, description)
 
 
104
 
105
  center_bbox_in_circle(image_path, bbox, image_path)
106
  overlay_image_bottom_right(image_path, "assets/corner.png", image_path)
tmp/extracted_frame.png CHANGED

Git LFS Details

  • SHA256: ebaa7092af7d5f1601241b30ea50efabc0beb52df8c8f9839913ac635382761c
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB

Git LFS Details

  • SHA256: 2f672e5328b1f86fc8aa8f1e813f0b6f56acf2bdd6b2dc58d22df9d70c1939c9
  • Pointer size: 131 Bytes
  • Size of remote file: 129 kB