Zhen Ye Claude Opus 4.6 commited on
Commit
a1ae44f
·
1 Parent(s): 6a99834

perf: defer GPU->CPU mask transfer to render thread via LazyFrameObjects

Browse files

Replace bulk segment_output.to_object_dicts() CPU transfer with lazy
materialization — only the last frame per segment is materialized for
IoU tracking continuity, while render frames use LazyFrameObjects that
defer GPU->CPU copy until the render thread needs them. Reduces CUDA
syncs from 100+ per segment to ~3. Also removes deployment_logs.txt.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. deployment_logs.txt +0 -398
  2. inference.py +36 -31
  3. models/segmenters/grounded_sam2.py +141 -112
deployment_logs.txt DELETED
@@ -1,398 +0,0 @@
1
- data: {"data":"===== Application Startup at 2026-01-23 22:02:27 =====\n","timestamp":"2026-01-23T22:02:27Z"}
2
-
3
- data: {"data":"Set HF_HOME to /tmp/huggingface","timestamp":"2026-01-23T22:02:32.613Z"}
4
-
5
- data: {"data":"CUDA_VISIBLE_DEVICES not set. All GPUs should be visible.","timestamp":"2026-01-23T22:02:32.613Z"}
6
-
7
- data: {"data":"Startup Diagnostics: Torch version 2.9.1+cu128, CUDA available: True, Device count: 4","timestamp":"2026-01-23T22:02:34.118Z"}
8
-
9
- data: {"data":"WARNING ⚠️ user config directory '/home/user/.config/Ultralytics' is not writable, using '/tmp/Ultralytics'. Set YOLO_CONFIG_DIR to override.","timestamp":"2026-01-23T22:02:36.713Z"}
10
-
11
- data: {"data":"Creating new Ultralytics Settings v0.0.6 file ✅ ","timestamp":"2026-01-23T22:02:36.721Z"}
12
-
13
- data: {"data":"View Ultralytics Settings with 'yolo settings' or at '/tmp/Ultralytics/settings.json'","timestamp":"2026-01-23T22:02:36.721Z"}
14
-
15
- data: {"data":"Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.","timestamp":"2026-01-23T22:02:36.721Z"}
16
-
17
- data: {"data":"INFO: Started server process [1]","timestamp":"2026-01-23T22:02:36.837Z"}
18
-
19
- data: {"data":"INFO: Waiting for application startup.","timestamp":"2026-01-23T22:02:36.837Z"}
20
-
21
- data: {"data":"INFO: Application startup complete.","timestamp":"2026-01-23T22:02:36.837Z"}
22
-
23
- data: {"data":"INFO: Uvicorn running on http://0.0.0.0:7860 (Press CTRL+C to quit)","timestamp":"2026-01-23T22:02:36.837Z"}
24
-
25
- data: {"data":"INFO: 10.16.14.243:63556 - \"GET /?logs=container HTTP/1.1\" 307 Temporary Redirect","timestamp":"2026-01-23T22:02:38.715Z"}
26
-
27
- data: {"data":"INFO: 10.16.42.137:52271 - \"GET /?logs=container HTTP/1.1\" 307 Temporary Redirect","timestamp":"2026-01-23T22:02:38.764Z"}
28
-
29
- data: {"data":"INFO: 10.16.14.243:63556 - \"GET /laser/index.html HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:38.884Z"}
30
-
31
- data: {"data":"INFO: 10.16.14.243:63556 - \"GET /laser/js/init.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:38.978Z"}
32
-
33
- data: {"data":"INFO: 10.16.42.137:52271 - \"GET /laser/style.css HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:38.979Z"}
34
-
35
- data: {"data":"INFO: 10.16.14.243:32424 - \"GET /laser/js/core/config.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.056Z"}
36
-
37
- data: {"data":"INFO: 10.16.42.137:11947 - \"GET /laser/js/core/state.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.061Z"}
38
-
39
- data: {"data":"INFO: 10.16.42.137:38479 - \"GET /laser/js/core/utils.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.061Z"}
40
-
41
- data: {"data":"INFO: 10.16.14.243:21985 - \"GET /laser/js/core/physics.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.072Z"}
42
-
43
- data: {"data":"INFO: 10.16.42.137:61072 - \"GET /laser/js/core/video.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.073Z"}
44
-
45
- data: {"data":"INFO: 10.16.14.243:19017 - \"GET /laser/js/ui/logging.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.073Z"}
46
-
47
- data: {"data":"INFO: 10.16.42.137:8536 - \"GET /laser/js/core/hel.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.074Z"}
48
-
49
- data: {"data":"INFO: 10.16.14.243:55450 - \"GET /laser/js/ui/cards.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.075Z"}
50
-
51
- data: {"data":"INFO: 10.16.42.137:7723 - \"GET /laser/js/api/client.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.075Z"}
52
-
53
- data: {"data":"INFO: 10.16.14.243:61397 - \"GET /laser/js/ui/features.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.075Z"}
54
-
55
- data: {"data":"INFO: 10.16.14.243:47351 - \"GET /laser/js/core/tracker.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.075Z"}
56
-
57
- data: {"data":"INFO: 10.16.42.137:28176 - \"GET /laser/js/ui/overlays.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.076Z"}
58
-
59
- data: {"data":"INFO: 10.16.42.137:33696 - \"GET /laser/js/ui/radar.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.076Z"}
60
-
61
- data: {"data":"INFO: 10.16.14.243:60830 - \"GET /laser/js/main.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.076Z"}
62
-
63
- data: {"data":"INFO: 10.16.42.137:13066 - \"GET /laser/js/ui/cursor.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.077Z"}
64
-
65
- data: {"data":"INFO: 10.16.42.137:52984 - \"GET /laser/js/ui/intel.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.078Z"}
66
-
67
- data: {"data":"INFO: 10.16.14.243:33644 - \"GET /laser/js/ui/trade.js HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:02:39.080Z"}
68
-
69
- data: {"data":"INFO:root:Loading Hugging Face YOLOv8 weights spencercdz/YOLOv8m_defence/yolov8m_defence.pt onto cuda:0","timestamp":"2026-01-23T22:02:53.356Z"}
70
-
71
- data: {"data":"INFO:root:GPT Output for First Frame:","timestamp":"2026-01-23T22:03:01.919Z"}
72
-
73
- data: {"data":"{'T01': {'id': 'T01', 'distance_m': 300.0, 'direction': \"1 o'clock\", 'description': 'Helicopter in the upper right'}, 'T02': {'id': 'T02', 'distance_m': 350.0, 'direction': \"10 o'clock\", 'description': 'Helicopter in the upper left'}, 'T03': {'id': 'T03', 'distance_m': 280.0, 'direction': \"12 o'clock\", 'description': 'Helicopter in the center'}, 'T04': {'id': 'T04', 'distance_m': 320.0, 'direction': \"11 o'clock\", 'description': 'Helicopter slightly left of center'}}","timestamp":"2026-01-23T22:03:01.919Z"}
74
-
75
- data: {"data":"INFO: 10.16.14.243:35547 - \"POST /detect/async HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:01.926Z"}
76
-
77
- data: {"data":"INFO:root:Detection queries: ['person', 'car', 'truck', 'motorcycle', 'bicycle', 'bus', 'train', 'airplane']","timestamp":"2026-01-23T22:03:01.941Z"}
78
-
79
- data: {"data":"INFO:root:Detected 1 GPUs. Loading models in parallel...","timestamp":"2026-01-23T22:03:01.941Z"}
80
-
81
- data: {"data":"INFO:root:Loading Hugging Face YOLOv8 weights spencercdz/YOLOv8m_defence/yolov8m_defence.pt onto cuda:0","timestamp":"2026-01-23T22:03:01.942Z"}
82
-
83
- data: {"data":"INFO: 10.16.14.243:35547 - \"GET /detect/first-frame/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:02.031Z"}
84
-
85
- data: {"data":"INFO:root:Running GPT estimation for video start (Frame 0)...","timestamp":"2026-01-23T22:03:03.012Z"}
86
-
87
- data: {"data":"INFO: 10.16.14.243:37805 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:05.147Z"}
88
-
89
- data: {"data":"ERROR:root:Worker failed processing frame","timestamp":"2026-01-23T22:03:05.472Z"}
90
-
91
- data: {"data":"Traceback (most recent call last):","timestamp":"2026-01-23T22:03:05.472Z"}
92
-
93
- data: {"data":" File \"/app/inference.py\", line 1048, in flush_batch","timestamp":"2026-01-23T22:03:05.472Z"}
94
-
95
- data: {"data":" queue_out.put((idx, processed, detections), timeout=1.0)","timestamp":"2026-01-23T22:03:05.472Z"}
96
-
97
- data: {"data":" File \"/usr/local/lib/python3.10/queue.py\", line 148, in put","timestamp":"2026-01-23T22:03:05.472Z"}
98
-
99
- data: {"data":" raise Full","timestamp":"2026-01-23T22:03:05.472Z"}
100
-
101
- data: {"data":"queue.Full","timestamp":"2026-01-23T22:03:05.472Z"}
102
-
103
- data: {"data":"","timestamp":"2026-01-23T22:03:05.472Z"}
104
-
105
- data: {"data":"During handling of the above exception, another exception occurred:","timestamp":"2026-01-23T22:03:05.472Z"}
106
-
107
- data: {"data":"","timestamp":"2026-01-23T22:03:05.472Z"}
108
-
109
- data: {"data":"Traceback (most recent call last):","timestamp":"2026-01-23T22:03:05.472Z"}
110
-
111
- data: {"data":" File \"/app/inference.py\", line 1072, in worker_task","timestamp":"2026-01-23T22:03:05.472Z"}
112
-
113
- data: {"data":" flush_batch()","timestamp":"2026-01-23T22:03:05.472Z"}
114
-
115
- data: {"data":" File \"/app/inference.py\", line 1050, in flush_batch","timestamp":"2026-01-23T22:03:05.472Z"}
116
-
117
- data: {"data":" except Full:","timestamp":"2026-01-23T22:03:05.472Z"}
118
-
119
- data: {"data":"NameError: name 'Full' is not defined","timestamp":"2026-01-23T22:03:05.472Z"}
120
-
121
- data: {"data":"Exception in thread Thread-4 (worker_task):","timestamp":"2026-01-23T22:03:05.472Z"}
122
-
123
- data: {"data":"Traceback (most recent call last):","timestamp":"2026-01-23T22:03:05.472Z"}
124
-
125
- data: {"data":" File \"/app/inference.py\", line 1048, in flush_batch","timestamp":"2026-01-23T22:03:05.472Z"}
126
-
127
- data: {"data":" queue_out.put((idx, processed, detections), timeout=1.0)","timestamp":"2026-01-23T22:03:05.472Z"}
128
-
129
- data: {"data":" File \"/usr/local/lib/python3.10/queue.py\", line 148, in put","timestamp":"2026-01-23T22:03:05.472Z"}
130
-
131
- data: {"data":" raise Full","timestamp":"2026-01-23T22:03:05.472Z"}
132
-
133
- data: {"data":"queue.Full","timestamp":"2026-01-23T22:03:05.472Z"}
134
-
135
- data: {"data":"","timestamp":"2026-01-23T22:03:05.472Z"}
136
-
137
- data: {"data":"During handling of the above exception, another exception occurred:","timestamp":"2026-01-23T22:03:05.472Z"}
138
-
139
- data: {"data":"","timestamp":"2026-01-23T22:03:05.472Z"}
140
-
141
- data: {"data":"Traceback (most recent call last):","timestamp":"2026-01-23T22:03:05.472Z"}
142
-
143
- data: {"data":" File \"/usr/local/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner","timestamp":"2026-01-23T22:03:05.472Z"}
144
-
145
- data: {"data":" self.run()","timestamp":"2026-01-23T22:03:05.472Z"}
146
-
147
- data: {"data":" File \"/usr/local/lib/python3.10/threading.py\", line 953, in run","timestamp":"2026-01-23T22:03:05.472Z"}
148
-
149
- data: {"data":" self._target(*self._args, **self._kwargs)","timestamp":"2026-01-23T22:03:05.472Z"}
150
-
151
- data: {"data":" File \"/app/inference.py\", line 1072, in worker_task","timestamp":"2026-01-23T22:03:05.473Z"}
152
-
153
- data: {"data":" flush_batch()","timestamp":"2026-01-23T22:03:05.473Z"}
154
-
155
- data: {"data":" File \"/app/inference.py\", line 1050, in flush_batch","timestamp":"2026-01-23T22:03:05.473Z"}
156
-
157
- data: {"data":" except Full:","timestamp":"2026-01-23T22:03:05.473Z"}
158
-
159
- data: {"data":"NameError: name 'Full' is not defined","timestamp":"2026-01-23T22:03:05.473Z"}
160
-
161
- data: {"data":"INFO: 10.16.14.243:37805 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:08.144Z"}
162
-
163
- data: {"data":"ERROR:root:Workers stopped unexpectedly.","timestamp":"2026-01-23T22:03:09.919Z"}
164
-
165
- data: {"data":"INFO: 10.16.14.243:37805 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:11.146Z"}
166
-
167
- data: {"data":"INFO: 10.16.14.243:37805 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:14.146Z"}
168
-
169
- data: {"data":"INFO: 10.16.14.243:37805 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:17.147Z"}
170
-
171
- data: {"data":"INFO: 10.16.42.137:24819 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:20.146Z"}
172
-
173
- data: {"data":"INFO: 10.16.14.243:44429 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:23.148Z"}
174
-
175
- data: {"data":"INFO: 10.16.42.137:53311 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:26.145Z"}
176
-
177
- data: {"data":"INFO: 10.16.42.137:53311 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:29.146Z"}
178
-
179
- data: {"data":"INFO: 10.16.42.137:53862 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:32.152Z"}
180
-
181
- data: {"data":"INFO: 10.16.14.243:19151 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:35.152Z"}
182
-
183
- data: {"data":"INFO: 10.16.42.137:55390 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:38.148Z"}
184
-
185
- data: {"data":"INFO: 10.16.42.137:55390 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:41.150Z"}
186
-
187
- data: {"data":"INFO: 10.16.14.243:57552 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:44.145Z"}
188
-
189
- data: {"data":"INFO: 10.16.42.137:40211 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:47.147Z"}
190
-
191
- data: {"data":"INFO: 10.16.42.137:40211 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:50.145Z"}
192
-
193
- data: {"data":"INFO: 10.16.14.243:65208 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:53.153Z"}
194
-
195
- data: {"data":"INFO: 10.16.14.243:65208 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:56.149Z"}
196
-
197
- data: {"data":"INFO: 10.16.42.137:40007 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:03:59.150Z"}
198
-
199
- data: {"data":"INFO: 10.16.42.137:40007 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:02.144Z"}
200
-
201
- data: {"data":"INFO: 10.16.14.243:12635 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:05.147Z"}
202
-
203
- data: {"data":"INFO: 10.16.14.243:12584 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:08.151Z"}
204
-
205
- data: {"data":"INFO: 10.16.42.137:20413 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:11.149Z"}
206
-
207
- data: {"data":"INFO: 10.16.42.137:20413 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:14.150Z"}
208
-
209
- data: {"data":"INFO: 10.16.14.243:38842 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:17.148Z"}
210
-
211
- data: {"data":"INFO: 10.16.14.243:38842 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:20.150Z"}
212
-
213
- data: {"data":"INFO: 10.16.14.243:38842 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:23.145Z"}
214
-
215
- data: {"data":"INFO: 10.16.14.243:7310 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:26.151Z"}
216
-
217
- data: {"data":"INFO: 10.16.42.137:30457 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:29.152Z"}
218
-
219
- data: {"data":"INFO: 10.16.14.243:50162 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:32.148Z"}
220
-
221
- data: {"data":"INFO: 10.16.14.243:50162 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:35.147Z"}
222
-
223
- data: {"data":"INFO: 10.16.14.243:45255 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:38.152Z"}
224
-
225
- data: {"data":"INFO: 10.16.14.243:45255 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:41.145Z"}
226
-
227
- data: {"data":"INFO: 10.16.42.137:61679 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:44.150Z"}
228
-
229
- data: {"data":"INFO: 10.16.42.137:61679 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:47.148Z"}
230
-
231
- data: {"data":"INFO: 10.16.42.137:37407 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:50.156Z"}
232
-
233
- data: {"data":"INFO: 10.16.42.137:37407 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:53.155Z"}
234
-
235
- data: {"data":"INFO: 10.16.14.243:12888 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:56.146Z"}
236
-
237
- data: {"data":"INFO: 10.16.42.137:54774 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:04:59.151Z"}
238
-
239
- data: {"data":"INFO: 10.16.14.243:13189 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:02.153Z"}
240
-
241
- data: {"data":"INFO: 10.16.42.137:7369 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:05.150Z"}
242
-
243
- data: {"data":"INFO: 10.16.14.243:44904 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:08.151Z"}
244
-
245
- data: {"data":"INFO: 10.16.14.243:44904 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:11.146Z"}
246
-
247
- data: {"data":"INFO: 10.16.14.243:44904 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:14.146Z"}
248
-
249
- data: {"data":"INFO: 10.16.42.137:38667 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:17.148Z"}
250
-
251
- data: {"data":"INFO: 10.16.14.243:17731 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:20.149Z"}
252
-
253
- data: {"data":"INFO: 10.16.14.243:17731 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:23.147Z"}
254
-
255
- data: {"data":"INFO: 10.16.42.137:27197 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:26.152Z"}
256
-
257
- data: {"data":"INFO: 10.16.42.137:27197 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:29.146Z"}
258
-
259
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:32.148Z"}
260
-
261
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:35.149Z"}
262
-
263
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:38.150Z"}
264
-
265
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:41.146Z"}
266
-
267
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:44.147Z"}
268
-
269
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:47.147Z"}
270
-
271
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:50.146Z"}
272
-
273
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:53.146Z"}
274
-
275
- data: {"data":"INFO: 10.16.14.243:61177 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:56.147Z"}
276
-
277
- data: {"data":"INFO: 10.16.42.137:41705 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:05:59.152Z"}
278
-
279
- data: {"data":"INFO: 10.16.42.137:41705 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:02.149Z"}
280
-
281
- data: {"data":"INFO: 10.16.14.243:51116 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:05.147Z"}
282
-
283
- data: {"data":"INFO: 10.16.14.243:51116 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:08.147Z"}
284
-
285
- data: {"data":"INFO: 10.16.42.137:59918 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:11.148Z"}
286
-
287
- data: {"data":"INFO: 10.16.42.137:37252 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:14.153Z"}
288
-
289
- data: {"data":"INFO: 10.16.14.243:49604 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:17.152Z"}
290
-
291
- data: {"data":"INFO: 10.16.42.137:24055 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:20.148Z"}
292
-
293
- data: {"data":"INFO: 10.16.42.137:24055 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:23.148Z"}
294
-
295
- data: {"data":"INFO: 10.16.14.243:24858 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:26.149Z"}
296
-
297
- data: {"data":"INFO: 10.16.42.137:65148 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:29.152Z"}
298
-
299
- data: {"data":"INFO: 10.16.14.243:29064 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:32.154Z"}
300
-
301
- data: {"data":"INFO: 10.16.14.243:29064 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:35.151Z"}
302
-
303
- data: {"data":"INFO: 10.16.42.137:44620 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:38.152Z"}
304
-
305
- data: {"data":"INFO: 10.16.14.243:46299 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:41.149Z"}
306
-
307
- data: {"data":"INFO: 10.16.42.137:8876 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:44.150Z"}
308
-
309
- data: {"data":"INFO: 10.16.42.137:8876 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:47.149Z"}
310
-
311
- data: {"data":"INFO: 10.16.42.137:8876 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:50.149Z"}
312
-
313
- data: {"data":"INFO: 10.16.42.137:8876 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:53.147Z"}
314
-
315
- data: {"data":"INFO: 10.16.42.137:14006 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:56.154Z"}
316
-
317
- data: {"data":"INFO: 10.16.14.243:55621 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:06:59.153Z"}
318
-
319
- data: {"data":"INFO: 10.16.14.243:55621 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:02.151Z"}
320
-
321
- data: {"data":"INFO: 10.16.14.243:19848 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:05.159Z"}
322
-
323
- data: {"data":"INFO: 10.16.42.137:12980 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:08.151Z"}
324
-
325
- data: {"data":"INFO: 10.16.42.137:12980 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:11.152Z"}
326
-
327
- data: {"data":"INFO: 10.16.14.243:27976 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:14.155Z"}
328
-
329
- data: {"data":"INFO: 10.16.42.137:50949 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:17.154Z"}
330
-
331
- data: {"data":"INFO: 10.16.14.243:47930 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:20.155Z"}
332
-
333
- data: {"data":"INFO: 10.16.42.137:26741 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:23.154Z"}
334
-
335
- data: {"data":"INFO: 10.16.14.243:45298 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:26.150Z"}
336
-
337
- data: {"data":"INFO: 10.16.14.243:45298 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:29.151Z"}
338
-
339
- data: {"data":"INFO: 10.16.14.243:45298 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:32.153Z"}
340
-
341
- data: {"data":"INFO: 10.16.14.243:45298 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:35.148Z"}
342
-
343
- data: {"data":"INFO: 10.16.14.243:63025 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:38.159Z"}
344
-
345
- data: {"data":"INFO: 10.16.42.137:38082 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:41.153Z"}
346
-
347
- data: {"data":"INFO: 10.16.42.137:38082 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:44.149Z"}
348
-
349
- data: {"data":"INFO: 10.16.14.243:53673 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:47.152Z"}
350
-
351
- data: {"data":"INFO: 10.16.14.243:5029 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:50.414Z"}
352
-
353
- data: {"data":"INFO: 10.16.42.137:21004 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:53.415Z"}
354
-
355
- data: {"data":"INFO: 10.16.42.137:21004 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:56.415Z"}
356
-
357
- data: {"data":"INFO: 10.16.14.243:18558 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:07:59.415Z"}
358
-
359
- data: {"data":"INFO: 10.16.42.137:21979 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:02.416Z"}
360
-
361
- data: {"data":"INFO: 10.16.14.243:4153 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:05.414Z"}
362
-
363
- data: {"data":"INFO: 10.16.42.137:47440 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:08.415Z"}
364
-
365
- data: {"data":"INFO: 10.16.42.137:47440 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:11.412Z"}
366
-
367
- data: {"data":"INFO: 10.16.14.243:51902 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:14.416Z"}
368
-
369
- data: {"data":"INFO: 10.16.42.137:50678 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:17.416Z"}
370
-
371
- data: {"data":"INFO: 10.16.42.137:50678 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:20.411Z"}
372
-
373
- data: {"data":"INFO: 10.16.14.243:24592 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:23.410Z"}
374
-
375
- data: {"data":"INFO: 10.16.14.243:24592 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:26.409Z"}
376
-
377
- data: {"data":"INFO: 10.16.14.243:43041 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:29.415Z"}
378
-
379
- data: {"data":"INFO: 10.16.14.243:13327 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:32.422Z"}
380
-
381
- data: {"data":"INFO: 10.16.42.137:48743 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:35.421Z"}
382
-
383
- data: {"data":"INFO: 10.16.14.243:64507 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:38.412Z"}
384
-
385
- data: {"data":"INFO: 10.16.14.243:29935 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:41.493Z"}
386
-
387
- data: {"data":"INFO: 10.16.14.243:29935 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:44.419Z"}
388
-
389
- data: {"data":"INFO: 10.16.14.243:12182 - \"GET /detect/status/720717af953045d5816689f0b22bbecd HTTP/1.1\" 200 OK","timestamp":"2026-01-23T22:08:47.428Z"}
390
-
391
- data: {"data":"INFO: Shutting down","timestamp":"2026-01-23T22:08:55.311Z"}
392
-
393
- data: {"data":"INFO: Waiting for application shutdown.","timestamp":"2026-01-23T22:08:55.411Z"}
394
-
395
- data: {"data":"INFO: Application shutdown complete.","timestamp":"2026-01-23T22:08:55.412Z"}
396
-
397
- data: {"data":"INFO: Finished server process [1]","timestamp":"2026-01-23T22:08:55.412Z"}
398
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inference.py CHANGED
@@ -1642,7 +1642,7 @@ def run_grounded_sam2_tracking(
1642
  from PIL import Image as PILImage
1643
 
1644
  from utils.video import extract_frames_to_jpeg_dir
1645
- from models.segmenters.grounded_sam2 import MaskDictionary, ObjectInfo, SegmentOutput
1646
 
1647
  active_segmenter = segmenter_name or "gsam2_large"
1648
  logging.info(
@@ -1691,6 +1691,10 @@ def run_grounded_sam2_tracking(
1691
  break
1692
  fidx, fobjs = item
1693
  try:
 
 
 
 
1694
  if _perf_metrics is not None:
1695
  _t_r = time.perf_counter()
1696
 
@@ -2064,12 +2068,23 @@ def run_grounded_sam2_tracking(
2064
  _t_track = time.perf_counter()
2065
 
2066
  def _feed_segment(seg_frames):
 
2067
  for fidx in sorted(seg_frames.keys()):
2068
  render_in.put((fidx, seg_frames[fidx]))
2069
 
 
 
 
 
 
 
 
 
 
2070
  tracking_results = segmenter.process_video(
2071
  frame_dir, frame_names, queries,
2072
  on_segment=_feed_segment,
 
2073
  )
2074
 
2075
  if _perf_metrics is not None:
@@ -2197,7 +2212,7 @@ def run_grounded_sam2_tracking(
2197
 
2198
  mask_dict = MaskDictionary()
2199
  mask_dict.add_new_frame_annotation(
2200
- mask_list=torch.tensor(masks).to(seg.device),
2201
  box_list=(
2202
  boxes.clone()
2203
  if torch.is_tensor(boxes)
@@ -2325,32 +2340,13 @@ def run_grounded_sam2_tracking(
2325
  next_seg_idx += 1
2326
  continue
2327
 
2328
- # Bulk CPU transfer: 3 CUDA syncs total (was 100+ per-mask syncs)
2329
- segment_results = segment_output.to_object_dicts()
2330
-
2331
- # Apply remapping to every frame in this segment
2332
- for frame_idx, frame_objects in segment_results.items():
2333
- remapped: Dict[int, ObjectInfo] = {}
2334
- for local_id, obj_info in frame_objects.items():
2335
- global_id = remapping.get(local_id)
2336
- if global_id is None:
2337
- continue
2338
- remapped[global_id] = ObjectInfo(
2339
- instance_id=global_id,
2340
- mask=obj_info.mask,
2341
- class_name=obj_info.class_name,
2342
- x1=obj_info.x1, y1=obj_info.y1,
2343
- x2=obj_info.x2, y2=obj_info.y2,
2344
- )
2345
- tracking_results[frame_idx] = remapped
2346
-
2347
- if _perf_metrics is not None:
2348
- _recon_accum_ms += (time.perf_counter() - _t_rc) * 1000.0
2349
-
2350
- # Update running tracker with last frame of this segment
2351
- if segment_results:
2352
- last_fi = max(segment_results.keys())
2353
- last_objs = tracking_results.get(last_fi, {})
2354
  sam2_masks = MaskDictionary()
2355
  sam2_masks.labels = copy.deepcopy(last_objs)
2356
  if last_objs:
@@ -2364,9 +2360,18 @@ def run_grounded_sam2_tracking(
2364
  m.shape[-1] if m.ndim >= 2 else 0
2365
  )
2366
 
2367
- # Feed reconciled frames to render immediately
2368
- for fi in range(start_idx, min(start_idx + step, total_frames)):
2369
- render_in.put((fi, tracking_results.get(fi, {})))
 
 
 
 
 
 
 
 
 
2370
 
2371
  next_seg_idx += 1
2372
 
 
1642
  from PIL import Image as PILImage
1643
 
1644
  from utils.video import extract_frames_to_jpeg_dir
1645
+ from models.segmenters.grounded_sam2 import MaskDictionary, ObjectInfo, LazyFrameObjects
1646
 
1647
  active_segmenter = segmenter_name or "gsam2_large"
1648
  logging.info(
 
1691
  break
1692
  fidx, fobjs = item
1693
  try:
1694
+ # Deferred GPU->CPU: materialize lazy objects in render thread
1695
+ if isinstance(fobjs, LazyFrameObjects):
1696
+ fobjs = fobjs.materialize()
1697
+
1698
  if _perf_metrics is not None:
1699
  _t_r = time.perf_counter()
1700
 
 
2068
  _t_track = time.perf_counter()
2069
 
2070
  def _feed_segment(seg_frames):
2071
+ """Fallback for empty/carry-forward segments (already CPU)."""
2072
  for fidx in sorted(seg_frames.keys()):
2073
  render_in.put((fidx, seg_frames[fidx]))
2074
 
2075
+ def _feed_segment_gpu(segment_output):
2076
+ """Feed LazyFrameObjects into render_in (GPU->CPU deferred)."""
2077
+ # Deduplicate: frame_indices has one entry per (frame, obj)
2078
+ seen = set()
2079
+ for fi in segment_output.frame_indices:
2080
+ if fi not in seen:
2081
+ seen.add(fi)
2082
+ render_in.put((fi, LazyFrameObjects(segment_output, fi)))
2083
+
2084
  tracking_results = segmenter.process_video(
2085
  frame_dir, frame_names, queries,
2086
  on_segment=_feed_segment,
2087
+ on_segment_output=_feed_segment_gpu,
2088
  )
2089
 
2090
  if _perf_metrics is not None:
 
2212
 
2213
  mask_dict = MaskDictionary()
2214
  mask_dict.add_new_frame_annotation(
2215
+ mask_list=masks,
2216
  box_list=(
2217
  boxes.clone()
2218
  if torch.is_tensor(boxes)
 
2340
  next_seg_idx += 1
2341
  continue
2342
 
2343
+ # Materialize ONLY the last frame for IoU tracking continuity
2344
+ last_fi = segment_output.last_frame_idx()
2345
+ if last_fi is not None:
2346
+ last_objs = segment_output.frame_to_object_dict(
2347
+ last_fi, remapping=remapping, to_cpu=True,
2348
+ )
2349
+ tracking_results[last_fi] = last_objs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2350
  sam2_masks = MaskDictionary()
2351
  sam2_masks.labels = copy.deepcopy(last_objs)
2352
  if last_objs:
 
2360
  m.shape[-1] if m.ndim >= 2 else 0
2361
  )
2362
 
2363
+ if _perf_metrics is not None:
2364
+ _recon_accum_ms += (time.perf_counter() - _t_rc) * 1000.0
2365
+
2366
+ # Feed LazyFrameObjects to render — GPU->CPU deferred to render workers
2367
+ seen_fi: set = set()
2368
+ for fi in segment_output.frame_indices:
2369
+ if fi not in seen_fi:
2370
+ seen_fi.add(fi)
2371
+ render_in.put((
2372
+ fi,
2373
+ LazyFrameObjects(segment_output, fi, remapping),
2374
+ ))
2375
 
2376
  next_seg_idx += 1
2377
 
models/segmenters/grounded_sam2.py CHANGED
@@ -37,55 +37,6 @@ class ObjectInfo:
37
  x2: int = 0
38
  y2: int = 0
39
 
40
- def update_box(self):
41
- """Derive bounding box from mask (GPU-native, minimal sync)."""
42
- if self.mask is None:
43
- return
44
- mask = self.mask
45
- if not torch.is_tensor(mask):
46
- mask = torch.as_tensor(mask)
47
-
48
- rows = mask.any(dim=1) # (H,) — which rows have any True
49
- cols = mask.any(dim=0) # (W,) — which cols have any True
50
-
51
- if not rows.any():
52
- return
53
-
54
- rows_f = rows.float()
55
- cols_f = cols.float()
56
- H, W = mask.shape[-2], mask.shape[-1]
57
-
58
- bbox = torch.stack([
59
- cols_f.argmax(),
60
- rows_f.argmax(),
61
- W - 1 - cols_f.flip(0).argmax(),
62
- H - 1 - rows_f.flip(0).argmax(),
63
- ])
64
- x1, y1, x2, y2 = bbox.tolist()
65
- self.x1 = int(x1)
66
- self.y1 = int(y1)
67
- self.x2 = int(x2)
68
- self.y2 = int(y2)
69
-
70
- @staticmethod
71
- def batch_bbox(masks: torch.Tensor):
72
- """Compute bboxes for (N, H, W) bool masks. Returns (N,4) cpu int, (N,) cpu bool."""
73
- N, H, W = masks.shape
74
- rows = masks.any(dim=2) # (N, H)
75
- cols = masks.any(dim=1) # (N, W)
76
- valid = rows.any(dim=1) # (N,)
77
-
78
- rows_f = rows.float()
79
- cols_f = cols.float()
80
-
81
- y_mins = rows_f.argmax(dim=1)
82
- y_maxs = H - 1 - rows_f.flip(1).argmax(dim=1)
83
- x_mins = cols_f.argmax(dim=1)
84
- x_maxs = W - 1 - cols_f.flip(1).argmax(dim=1)
85
-
86
- bboxes = torch.stack([x_mins, y_mins, x_maxs, y_maxs], dim=1)
87
- return bboxes.cpu(), valid.cpu()
88
-
89
 
90
  @dataclass
91
  class MaskDictionary:
@@ -261,29 +212,89 @@ class SegmentOutput:
261
  class_names: List[str] # len == count
262
  device: str = "cpu"
263
 
264
- def to_object_dicts(self) -> Dict[int, Dict[int, "ObjectInfo"]]:
265
- """Bulk CPU transfer + ObjectInfo construction. 3 CUDA syncs total."""
266
- if self.masks.numel() == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  return {}
268
- masks_cpu = self.masks.cpu() # sync 1
269
- bboxes_cpu = self.bboxes.cpu() # sync 2
270
- valid_cpu = self.valid.cpu() # sync 3
271
- result: Dict[int, Dict[int, ObjectInfo]] = {}
272
- for i in range(masks_cpu.shape[0]):
273
- fi, oid, cn = self.frame_indices[i], self.obj_ids[i], self.class_names[i]
274
- if valid_cpu[i]:
275
- x1, y1, x2, y2 = int(bboxes_cpu[i, 0]), int(bboxes_cpu[i, 1]), int(bboxes_cpu[i, 2]), int(bboxes_cpu[i, 3])
 
 
 
 
 
 
 
 
 
 
 
276
  else:
 
 
277
  x1 = y1 = x2 = y2 = 0
278
- info = ObjectInfo(
279
- instance_id=oid, mask=masks_cpu[i],
280
- class_name=cn, x1=x1, y1=y1, x2=x2, y2=y2,
 
 
 
281
  )
282
- result.setdefault(fi, {})[oid] = info
283
  return result
284
 
285
- def last_frame_idx(self) -> Optional[int]:
286
- return self.frame_indices[-1] if self.frame_indices else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
 
289
  # ---------------------------------------------------------------------------
@@ -421,6 +432,45 @@ class GroundedSAM2Segmenter(Segmenter):
421
  )
422
  logging.info("num_maskmem changed from %d to %d", old, num_maskmem)
423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
  # -- Single-frame interface (Segmenter.predict) -------------------------
425
 
426
  def predict(
@@ -488,7 +538,7 @@ class GroundedSAM2Segmenter(Segmenter):
488
  self,
489
  image: "Image",
490
  text_prompts: List[str],
491
- ) -> Tuple[Optional[np.ndarray], Optional[torch.Tensor], List[str]]:
492
  """Run GDINO + SAM2 image predictor on a single keyframe.
493
 
494
  Args:
@@ -497,9 +547,9 @@ class GroundedSAM2Segmenter(Segmenter):
497
 
498
  Returns:
499
  ``(masks, boxes, labels)`` where *masks* is an ``(N, H, W)``
500
- numpy array, *boxes* is an ``(N, 4)`` tensor on device, and
501
- *labels* is a list of strings. Returns ``(None, None, [])``
502
- when no objects are detected.
503
  """
504
  self._ensure_models_loaded()
505
  _pm = getattr(self, '_perf_metrics', None)
@@ -547,12 +597,7 @@ class GroundedSAM2Segmenter(Segmenter):
547
  _t1 = time.perf_counter()
548
 
549
  self._image_predictor.set_image(np.array(image))
550
- masks, scores, logits = self._image_predictor.predict(
551
- point_coords=None,
552
- point_labels=None,
553
- box=input_boxes,
554
- multimask_output=False,
555
- )
556
 
557
  if _pm is not None:
558
  _pl = getattr(self, '_perf_lock', None)
@@ -562,11 +607,6 @@ class GroundedSAM2Segmenter(Segmenter):
562
  else:
563
  _pm["sam_image_total_ms"] += _d
564
 
565
- if masks.ndim == 2:
566
- masks = masks[None]
567
- elif masks.ndim == 4:
568
- masks = masks.squeeze(1)
569
-
570
  return masks, input_boxes, det_labels
571
 
572
  def propagate_segment(
@@ -579,7 +619,7 @@ class GroundedSAM2Segmenter(Segmenter):
579
  """Propagate masks for a single segment via SAM2 video predictor.
580
 
581
  Returns a GPU-resident ``SegmentOutput`` with zero CUDA syncs.
582
- Call ``output.to_object_dicts()`` to materialize CPU ObjectInfo dicts.
583
  """
584
  _pm = getattr(self, '_perf_metrics', None)
585
  if _pm is not None:
@@ -674,6 +714,7 @@ class GroundedSAM2Segmenter(Segmenter):
674
  frame_names: List[str],
675
  text_prompts: List[str],
676
  on_segment: Optional[Callable[[Dict[int, Dict[int, "ObjectInfo"]]], None]] = None,
 
677
  ) -> Dict[int, Dict[int, ObjectInfo]]:
678
  """Run full Grounded-SAM-2 tracking pipeline on extracted JPEG frames.
679
 
@@ -801,12 +842,7 @@ class GroundedSAM2Segmenter(Segmenter):
801
  _t_si = time.perf_counter()
802
 
803
  self._image_predictor.set_image(np.array(image))
804
- masks, scores, logits = self._image_predictor.predict(
805
- point_coords=None,
806
- point_labels=None,
807
- box=input_boxes,
808
- multimask_output=False,
809
- )
810
 
811
  if _pm is not None:
812
  _pl = getattr(self, '_perf_lock', None)
@@ -816,16 +852,8 @@ class GroundedSAM2Segmenter(Segmenter):
816
  else:
817
  _pm["sam_image_total_ms"] += _d
818
 
819
- # Normalize mask dims
820
- if masks.ndim == 2:
821
- masks = masks[None]
822
- scores = scores[None]
823
- logits = logits[None]
824
- elif masks.ndim == 4:
825
- masks = masks.squeeze(1)
826
-
827
  mask_dict.add_new_frame_annotation(
828
- mask_list=torch.tensor(masks).to(device),
829
  box_list=input_boxes.clone() if torch.is_tensor(input_boxes) else torch.tensor(input_boxes),
830
  label_list=det_labels,
831
  )
@@ -863,22 +891,23 @@ class GroundedSAM2Segmenter(Segmenter):
863
  segment_output = self.propagate_segment(
864
  inference_state, start_idx, mask_dict, step,
865
  )
866
- segment_results = segment_output.to_object_dicts()
867
 
868
- if segment_results:
869
- all_results.update(segment_results)
870
- if on_segment:
871
- on_segment(segment_results)
872
- last_fi = segment_output.last_frame_idx()
873
- if last_fi is not None:
874
- last_frame_objects = all_results.get(last_fi, {})
875
- sam2_masks = MaskDictionary()
876
- sam2_masks.labels = copy.deepcopy(last_frame_objects)
877
- if last_frame_objects:
878
- first_info = next(iter(last_frame_objects.values()))
879
- if first_info.mask is not None:
880
- sam2_masks.mask_height = first_info.mask.shape[-2] if first_info.mask.ndim >= 2 else 0
881
- sam2_masks.mask_width = first_info.mask.shape[-1] if first_info.mask.ndim >= 2 else 0
 
 
882
 
883
  logging.info(
884
  "Grounded-SAM-2 tracking complete: %d frames, %d tracked objects",
 
37
  x2: int = 0
38
  y2: int = 0
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  @dataclass
42
  class MaskDictionary:
 
212
  class_names: List[str] # len == count
213
  device: str = "cpu"
214
 
215
+ def last_frame_idx(self) -> Optional[int]:
216
+ return self.frame_indices[-1] if self.frame_indices else None
217
+
218
+ def frame_to_object_dict(
219
+ self,
220
+ frame_idx: int,
221
+ remapping: Optional[Dict[int, int]] = None,
222
+ to_cpu: bool = True,
223
+ ) -> Dict[int, "ObjectInfo"]:
224
+ """Materialize a single frame's ObjectInfo dict from GPU buffers.
225
+
226
+ Args:
227
+ frame_idx: The frame index to materialize.
228
+ remapping: Optional local->global ID mapping.
229
+ to_cpu: If True, transfer mask/bbox to CPU.
230
+
231
+ Returns:
232
+ ``{obj_id: ObjectInfo}`` for the requested frame.
233
+ """
234
+ # Build lazy frame index on first call
235
+ if not hasattr(self, '_frame_index'):
236
+ idx: Dict[int, List[int]] = {}
237
+ for i, fi in enumerate(self.frame_indices):
238
+ idx.setdefault(fi, []).append(i)
239
+ self._frame_index = idx
240
+
241
+ positions = self._frame_index.get(frame_idx)
242
+ if not positions:
243
  return {}
244
+
245
+ result: Dict[int, ObjectInfo] = {}
246
+ for i in positions:
247
+ oid = self.obj_ids[i]
248
+ cn = self.class_names[i]
249
+ global_id = remapping.get(oid, oid) if remapping else oid
250
+ mask = self.masks[i]
251
+ valid = self.valid[i]
252
+ if valid:
253
+ bbox = self.bboxes[i]
254
+ if to_cpu:
255
+ mask = mask.cpu()
256
+ x1 = int(bbox[0].item())
257
+ y1 = int(bbox[1].item())
258
+ x2 = int(bbox[2].item())
259
+ y2 = int(bbox[3].item())
260
+ else:
261
+ x1, y1 = int(bbox[0]), int(bbox[1])
262
+ x2, y2 = int(bbox[2]), int(bbox[3])
263
  else:
264
+ if to_cpu:
265
+ mask = mask.cpu()
266
  x1 = y1 = x2 = y2 = 0
267
+
268
+ result[global_id] = ObjectInfo(
269
+ instance_id=global_id,
270
+ mask=mask,
271
+ class_name=cn,
272
+ x1=x1, y1=y1, x2=x2, y2=y2,
273
  )
 
274
  return result
275
 
276
+
277
+ # ---------------------------------------------------------------------------
278
+ # Lazy frame objects wrapper (deferred GPU->CPU per-frame)
279
+ # ---------------------------------------------------------------------------
280
+
281
+ @dataclass
282
+ class LazyFrameObjects:
283
+ """Lightweight wrapper for deferred GPU->CPU materialization.
284
+
285
+ Holds a reference to a GPU-resident ``SegmentOutput`` plus frame index
286
+ and optional ID remapping. Call ``materialize()`` to perform the
287
+ GPU->CPU transfer (intended to run in a render worker thread).
288
+ """
289
+ segment_output: SegmentOutput
290
+ frame_idx: int
291
+ remapping: Optional[Dict[int, int]] = None
292
+
293
+ def materialize(self) -> Dict[int, "ObjectInfo"]:
294
+ """Transfer one frame's data from GPU to CPU and build ObjectInfo dict."""
295
+ return self.segment_output.frame_to_object_dict(
296
+ self.frame_idx, remapping=self.remapping, to_cpu=True,
297
+ )
298
 
299
 
300
  # ---------------------------------------------------------------------------
 
432
  )
433
  logging.info("num_maskmem changed from %d to %d", old, num_maskmem)
434
 
435
+ # -- GPU-resident SAM2 predict (skip numpy conversion) ------------------
436
+
437
+ def _predict_masks_gpu(
438
+ self, input_boxes: torch.Tensor,
439
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
440
+ """Run SAM2 image predictor keeping masks on GPU (skip numpy conversion).
441
+
442
+ Calls SAM2's internal ``_prep_prompts`` + ``_predict`` directly,
443
+ bypassing the public ``predict()`` which converts to numpy.
444
+
445
+ Args:
446
+ input_boxes: (N, 4) float tensor on device.
447
+
448
+ Returns:
449
+ ``(masks, scores)`` — *masks* is ``(N, H, W)`` bool GPU tensor,
450
+ *scores* is ``(N,)`` float GPU tensor.
451
+ """
452
+ mask_input, unnorm_coords, labels, unnorm_box = (
453
+ self._image_predictor._prep_prompts(
454
+ point_coords=None,
455
+ point_labels=None,
456
+ box=input_boxes,
457
+ mask_input=None,
458
+ normalize_coords=True,
459
+ )
460
+ )
461
+ masks, scores, _ = self._image_predictor._predict(
462
+ unnorm_coords, labels, unnorm_box, mask_input,
463
+ multimask_output=False, return_logits=False,
464
+ )
465
+ # _predict returns (1, N, ..., H, W); squeeze batch dim
466
+ masks = masks.squeeze(0)
467
+ if masks.ndim == 2:
468
+ masks = masks[None]
469
+ elif masks.ndim == 4:
470
+ masks = masks.squeeze(1)
471
+ scores = scores.squeeze(0).flatten()
472
+ return masks, scores
473
+
474
  # -- Single-frame interface (Segmenter.predict) -------------------------
475
 
476
  def predict(
 
538
  self,
539
  image: "Image",
540
  text_prompts: List[str],
541
+ ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], List[str]]:
542
  """Run GDINO + SAM2 image predictor on a single keyframe.
543
 
544
  Args:
 
547
 
548
  Returns:
549
  ``(masks, boxes, labels)`` where *masks* is an ``(N, H, W)``
550
+ bool GPU tensor, *boxes* is an ``(N, 4)`` tensor on device,
551
+ and *labels* is a list of strings. Returns
552
+ ``(None, None, [])`` when no objects are detected.
553
  """
554
  self._ensure_models_loaded()
555
  _pm = getattr(self, '_perf_metrics', None)
 
597
  _t1 = time.perf_counter()
598
 
599
  self._image_predictor.set_image(np.array(image))
600
+ masks, scores = self._predict_masks_gpu(input_boxes)
 
 
 
 
 
601
 
602
  if _pm is not None:
603
  _pl = getattr(self, '_perf_lock', None)
 
607
  else:
608
  _pm["sam_image_total_ms"] += _d
609
 
 
 
 
 
 
610
  return masks, input_boxes, det_labels
611
 
612
  def propagate_segment(
 
619
  """Propagate masks for a single segment via SAM2 video predictor.
620
 
621
  Returns a GPU-resident ``SegmentOutput`` with zero CUDA syncs.
622
+ Call ``output.frame_to_object_dict()`` to materialize per-frame CPU dicts.
623
  """
624
  _pm = getattr(self, '_perf_metrics', None)
625
  if _pm is not None:
 
714
  frame_names: List[str],
715
  text_prompts: List[str],
716
  on_segment: Optional[Callable[[Dict[int, Dict[int, "ObjectInfo"]]], None]] = None,
717
+ on_segment_output: Optional[Callable[["SegmentOutput"], None]] = None,
718
  ) -> Dict[int, Dict[int, ObjectInfo]]:
719
  """Run full Grounded-SAM-2 tracking pipeline on extracted JPEG frames.
720
 
 
842
  _t_si = time.perf_counter()
843
 
844
  self._image_predictor.set_image(np.array(image))
845
+ masks, scores = self._predict_masks_gpu(input_boxes)
 
 
 
 
 
846
 
847
  if _pm is not None:
848
  _pl = getattr(self, '_perf_lock', None)
 
852
  else:
853
  _pm["sam_image_total_ms"] += _d
854
 
 
 
 
 
 
 
 
 
855
  mask_dict.add_new_frame_annotation(
856
+ mask_list=masks,
857
  box_list=input_boxes.clone() if torch.is_tensor(input_boxes) else torch.tensor(input_boxes),
858
  label_list=det_labels,
859
  )
 
891
  segment_output = self.propagate_segment(
892
  inference_state, start_idx, mask_dict, step,
893
  )
 
894
 
895
+ # GPU-deferred path: only materialize last frame for IoU
896
+ last_fi = segment_output.last_frame_idx()
897
+ if last_fi is not None:
898
+ last_frame_objects = segment_output.frame_to_object_dict(
899
+ last_fi, to_cpu=True,
900
+ )
901
+ all_results[last_fi] = last_frame_objects
902
+ sam2_masks = MaskDictionary()
903
+ sam2_masks.labels = copy.deepcopy(last_frame_objects)
904
+ if last_frame_objects:
905
+ first_info = next(iter(last_frame_objects.values()))
906
+ if first_info.mask is not None:
907
+ sam2_masks.mask_height = first_info.mask.shape[-2] if first_info.mask.ndim >= 2 else 0
908
+ sam2_masks.mask_width = first_info.mask.shape[-1] if first_info.mask.ndim >= 2 else 0
909
+ if on_segment_output is not None:
910
+ on_segment_output(segment_output)
911
 
912
  logging.info(
913
  "Grounded-SAM-2 tracking complete: %d frames, %d tracked objects",