Zhen Ye commited on
Commit
06e44d3
·
1 Parent(s): 9ea2cfe

Fix inference concurrency bugs and enable default first-frame GPT

Browse files
Files changed (7) hide show
  1. .gitignore +2 -1
  2. LaserPerception/LaserPerception.js +6 -2
  3. app.py +2 -2
  4. demo.html +0 -733
  5. inference.py +229 -75
  6. requirements.txt +0 -5
  7. update_radar.py +0 -189
.gitignore CHANGED
@@ -7,4 +7,5 @@ __pycache__/
7
  .env
8
  *.mdcheckpoints/
9
  checkpoints/
10
- *.md
 
 
7
  .env
8
  *.mdcheckpoints/
9
  checkpoints/
10
+ *.md
11
+ .agent
LaserPerception/LaserPerception.js CHANGED
@@ -2116,8 +2116,12 @@
2116
  reqP_kW: null,
2117
  maxP_kW: null,
2118
  pkill: null,
2119
- // Depth visualization only, GPT handles distance
2120
- depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null
 
 
 
 
2121
  };
2122
  });
2123
 
 
2116
  reqP_kW: null,
2117
  maxP_kW: null,
2118
  pkill: null,
2119
+ // Depth visualization only
2120
+ depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
2121
+ // Bind GPT reasoning fields from backend
2122
+ gpt_distance_m: d.gpt_distance_m || null,
2123
+ gpt_direction: d.gpt_direction || null,
2124
+ gpt_description: d.gpt_description || null
2125
  };
2126
  });
2127
 
app.py CHANGED
@@ -171,7 +171,7 @@ async def detect_endpoint(
171
  detector: str = Form("hf_yolov8"),
172
  segmenter: str = Form("sam3"),
173
  enable_depth: bool = Form(False),
174
- enable_gpt: bool = Form(False),
175
  ):
176
  """
177
  Main detection endpoint.
@@ -315,7 +315,7 @@ async def detect_async_endpoint(
315
  depth_estimator: str = Form("depth"),
316
  depth_scale: float = Form(25.0),
317
  enable_depth: bool = Form(False),
318
- enable_gpt: bool = Form(False),
319
  ):
320
  if mode not in VALID_MODES:
321
  raise HTTPException(
 
171
  detector: str = Form("hf_yolov8"),
172
  segmenter: str = Form("sam3"),
173
  enable_depth: bool = Form(False),
174
+ enable_gpt: bool = Form(True),
175
  ):
176
  """
177
  Main detection endpoint.
 
315
  depth_estimator: str = Form("depth"),
316
  depth_scale: float = Form(25.0),
317
  enable_depth: bool = Form(False),
318
+ enable_gpt: bool = Form(True),
319
  ):
320
  if mode not in VALID_MODES:
321
  raise HTTPException(
demo.html DELETED
@@ -1,733 +0,0 @@
1
- <!DOCTYPE html>
2
-
3
- <html lang="en">
4
- <head>
5
- <meta charset="UTF-8">
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>Perception System</title>
8
- <style>
9
- * {
10
- margin: 0;
11
- padding: 0;
12
- box-sizing: border-box;
13
- }
14
-
15
- body {
16
- font-family: "IBM Plex Sans", "Avenir Next", "Helvetica Neue", sans-serif;
17
- background: linear-gradient(180deg, #f6f7f9 0%, #eef1f4 100%);
18
- color: #1f2933;
19
- min-height: 100vh;
20
- padding: 20px;
21
- }
22
-
23
- .container {
24
- max-width: 1200px;
25
- margin: 0 auto;
26
- }
27
-
28
- h1 {
29
- color: #1f2933;
30
- text-align: center;
31
- margin-bottom: 30px;
32
- font-size: 2.5rem;
33
- letter-spacing: 0.5px;
34
- }
35
-
36
- .main-card {
37
- background: #ffffff;
38
- border-radius: 16px;
39
- box-shadow: 0 18px 40px rgba(16, 24, 40, 0.12);
40
- padding: 40px;
41
- }
42
-
43
- .section {
44
- margin-bottom: 30px;
45
- }
46
-
47
- .section-title {
48
- font-size: 1.2rem;
49
- font-weight: 600;
50
- color: #333;
51
- margin-bottom: 15px;
52
- }
53
-
54
- /* Mode selector */
55
- .mode-selector {
56
- display: grid;
57
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
58
- gap: 15px;
59
- }
60
-
61
- .mode-card {
62
- position: relative;
63
- padding: 20px;
64
- border: 1px solid #d6dbe0;
65
- border-radius: 12px;
66
- cursor: pointer;
67
- transition: all 0.3s ease;
68
- text-align: center;
69
- background: #f9fafb;
70
- }
71
-
72
- .mode-card:hover {
73
- border-color: #4b5563;
74
- transform: translateY(-2px);
75
- box-shadow: 0 6px 16px rgba(16, 24, 40, 0.12);
76
- }
77
-
78
- .mode-card.selected {
79
- border-color: #1f2933;
80
- background: #eef2f6;
81
- }
82
-
83
- .mode-card.disabled {
84
- opacity: 0.5;
85
- cursor: not-allowed;
86
- }
87
-
88
- .mode-card input[type="radio"] {
89
- position: absolute;
90
- opacity: 0;
91
- }
92
-
93
- .mode-icon {
94
- display: none;
95
- }
96
-
97
- .mode-title {
98
- font-weight: 600;
99
- color: #333;
100
- margin-bottom: 5px;
101
- }
102
-
103
- .mode-badge {
104
- display: inline-block;
105
- padding: 4px 8px;
106
- background: #6b7280;
107
- color: #f9fafb;
108
- font-size: 0.7rem;
109
- border-radius: 4px;
110
- font-weight: 600;
111
- margin-top: 8px;
112
- }
113
-
114
- /* Input fields */
115
- .input-group {
116
- margin-bottom: 20px;
117
- }
118
-
119
- .input-group label {
120
- display: block;
121
- font-weight: 500;
122
- color: #555;
123
- margin-bottom: 8px;
124
- }
125
-
126
- .input-group input[type="text"],
127
- .input-group select {
128
- width: 100%;
129
- padding: 12px;
130
- border: 1px solid #d6dbe0;
131
- border-radius: 8px;
132
- font-size: 1rem;
133
- transition: border-color 0.3s;
134
- background: #ffffff;
135
- }
136
-
137
- .input-group input[type="text"]:focus,
138
- .input-group select:focus {
139
- outline: none;
140
- border-color: #4b5563;
141
- }
142
-
143
- .file-input-wrapper {
144
- position: relative;
145
- display: inline-block;
146
- width: 100%;
147
- }
148
-
149
- .file-input-label {
150
- display: block;
151
- padding: 15px;
152
- background: #f3f4f6;
153
- border: 1px dashed #bfc5cc;
154
- border-radius: 8px;
155
- text-align: center;
156
- cursor: pointer;
157
- transition: all 0.3s;
158
- }
159
-
160
- .file-input-label:hover {
161
- border-color: #4b5563;
162
- background: #eceff3;
163
- }
164
-
165
- .file-input-label.has-file {
166
- border-color: #1f2933;
167
- background: #e8edf2;
168
- }
169
-
170
- input[type="file"] {
171
- position: absolute;
172
- opacity: 0;
173
- width: 0;
174
- height: 0;
175
- }
176
-
177
- /* Buttons */
178
- .btn {
179
- padding: 14px 28px;
180
- font-size: 1rem;
181
- font-weight: 600;
182
- border: none;
183
- border-radius: 8px;
184
- cursor: pointer;
185
- transition: all 0.3s;
186
- width: 100%;
187
- }
188
-
189
- .btn-primary {
190
- background: #1f2933;
191
- color: #f9fafb;
192
- }
193
-
194
- .btn-primary:hover:not(:disabled) {
195
- transform: translateY(-2px);
196
- box-shadow: 0 6px 16px rgba(16, 24, 40, 0.2);
197
- }
198
-
199
- .btn:disabled {
200
- opacity: 0.5;
201
- cursor: not-allowed;
202
- }
203
-
204
- /* Results */
205
- .results-grid {
206
- display: grid;
207
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
208
- gap: 20px;
209
- }
210
-
211
- .video-card {
212
- border: 1px solid #e0e0e0;
213
- border-radius: 8px;
214
- overflow: hidden;
215
- }
216
-
217
- .video-card-header {
218
- background: #f8f9fa;
219
- padding: 12px 16px;
220
- font-weight: 600;
221
- color: #333;
222
- }
223
-
224
- .video-card-body {
225
- padding: 16px;
226
- }
227
-
228
- video {
229
- width: 100%;
230
- border-radius: 8px;
231
- background: #000;
232
- }
233
-
234
- .frame-preview {
235
- width: 100%;
236
- border-radius: 8px;
237
- background: #f3f4f6;
238
- display: block;
239
- }
240
-
241
- .frame-placeholder {
242
- width: 100%;
243
- border-radius: 8px;
244
- background: #f3f4f6;
245
- color: #6b7280;
246
- display: flex;
247
- align-items: center;
248
- justify-content: center;
249
- min-height: 200px;
250
- font-size: 0.95rem;
251
- text-align: center;
252
- padding: 16px;
253
- }
254
-
255
- .download-btn {
256
- margin-top: 12px;
257
- padding: 10px 16px;
258
- background: #374151;
259
- color: #f9fafb;
260
- text-decoration: none;
261
- border-radius: 6px;
262
- display: inline-block;
263
- font-size: 0.9rem;
264
- }
265
-
266
- .download-btn:hover {
267
- background: #1f2933;
268
- }
269
-
270
- /* Loading spinner */
271
- .loading {
272
- display: none;
273
- text-align: center;
274
- padding: 20px;
275
- }
276
-
277
- .loading.show {
278
- display: block;
279
- }
280
-
281
- .status-line {
282
- margin-top: 12px;
283
- font-size: 0.95rem;
284
- color: #4b5563;
285
- text-align: center;
286
- }
287
-
288
- .spinner {
289
- border: 4px solid #e5e7eb;
290
- border-top: 4px solid #1f2933;
291
- border-radius: 50%;
292
- width: 40px;
293
- height: 40px;
294
- animation: spin 1s linear infinite;
295
- margin: 0 auto 10px;
296
- }
297
-
298
- @keyframes spin {
299
- 0% { transform: rotate(0deg); }
300
- 100% { transform: rotate(360deg); }
301
- }
302
-
303
- /* View toggle buttons */
304
- .view-toggle-btn {
305
- padding: 12px 28px;
306
- margin: 0 10px;
307
- background: #e5e7eb;
308
- color: #374151;
309
- border: 2px solid #d1d5db;
310
- border-radius: 8px;
311
- cursor: pointer;
312
- font-weight: 600;
313
- font-size: 14px;
314
- transition: all 0.3s;
315
- }
316
-
317
- .view-toggle-btn.active {
318
- background: #1f2933;
319
- color: #f9fafb;
320
- border-color: #1f2933;
321
- }
322
-
323
- .view-toggle-btn:hover:not(.active) {
324
- background: #d1d5db;
325
- transform: translateY(-1px);
326
- }
327
-
328
- .hidden {
329
- display: none;
330
- }
331
-
332
- </style>
333
- </head>
334
- <body>
335
- <div class="container">
336
- <h1>Perception System</h1>
337
-
338
- <div class="main-card">
339
- <!-- Mode Selection -->
340
- <div class="section">
341
- <div class="section-title">1. Select Detection Mode</div>
342
- <div class="mode-selector">
343
- <label class="mode-card selected">
344
- <input type="radio" name="mode" value="object_detection" checked>
345
- <div class="mode-title">Object Detection</div>
346
- </label>
347
-
348
- <label class="mode-card">
349
- <input type="radio" name="mode" value="segmentation">
350
- <div class="mode-title">Segmentation</div>
351
- </label>
352
-
353
- <label class="mode-card">
354
- <input type="radio" name="mode" value="drone_detection">
355
- <div class="mode-title">Drone Detection</div>
356
- </label>
357
- </div>
358
- </div>
359
-
360
- <!-- Text Prompts Input (for all modes) -->
361
- <div class="section" id="queriesSection">
362
- <div class="input-group">
363
- <label for="queries" id="queriesLabel">Text Prompts (comma-separated)</label>
364
- <input
365
- type="text"
366
- id="queries"
367
- placeholder="person, car, dog, bicycle"
368
- >
369
- <small id="queriesHint" style="color: #666; display: block; margin-top: 5px;">
370
- Enter objects to detect or segment
371
- </small>
372
- </div>
373
- </div>
374
-
375
- <!-- Detector Selection -->
376
- <div class="section" id="detectorSection">
377
- <div class="input-group">
378
- <label for="detector">2. Select Detection Model</label>
379
- <select id="detector">
380
- <option value="hf_yolov8">YOLOv8 (Fast, COCO classes)</option>
381
- <option value="detr_resnet50">DETR ResNet-50 (Transformer-based)</option>
382
- <option value="grounding_dino">Grounding DINO (Open-vocabulary)</option>
383
- </select>
384
- </div>
385
- </div>
386
-
387
- <!-- Segmenter Selection -->
388
- <div class="section hidden" id="segmenterSection">
389
- <div class="input-group">
390
- <label for="segmenter">2. Select Segmentation Model</label>
391
- <select id="segmenter">
392
- <option value="sam3">SAM3 (Segment Anything Model 3)</option>
393
- </select>
394
- </div>
395
- </div>
396
-
397
- <!-- Drone Model Selection -->
398
- <div class="section hidden" id="droneModelSection">
399
- <div class="input-group">
400
- <label for="droneModel">2. Select Drone Model</label>
401
- <select id="droneModel" disabled>
402
- <option value="drone_yolo">Drone YOLO (HF pretrained)</option>
403
- </select>
404
- </div>
405
- </div>
406
-
407
- <!-- Depth Model Selection -->
408
- <div class="section" id="depthModelSection">
409
- <div class="input-group">
410
- <label for="depthModel">3. Select Depth Model</label>
411
- <select id="depthModel">
412
- <option value="depth">Depth</option>
413
- </select>
414
- </div>
415
- </div>
416
-
417
- <!-- Video Upload -->
418
- <div class="section">
419
- <div class="input-group">
420
- <label>4. Upload Video</label>
421
- <div class="file-input-wrapper">
422
- <label class="file-input-label" id="fileLabel" for="videoFile">
423
- Click to select video file (MP4)
424
- </label>
425
- <input type="file" id="videoFile" accept="video/*">
426
- </div>
427
- </div>
428
- </div>
429
-
430
- <!-- Process Button -->
431
- <div class="section">
432
- <button class="btn btn-primary" id="processBtn" disabled>
433
- Process Video
434
- </button>
435
- </div>
436
-
437
- <!-- Loading -->
438
- <div class="loading" id="loading">
439
- <div class="spinner"></div>
440
- <p>Processing video... This may take a while depending on video length.</p>
441
- </div>
442
- <p class="status-line hidden" id="statusLine"></p>
443
-
444
- <!-- Results -->
445
- <div class="section hidden" id="resultsSection">
446
- <div class="section-title">Results</div>
447
-
448
- <!-- View Toggle Buttons -->
449
- <div id="viewToggleContainer" class="hidden" style="text-align: center; margin-bottom: 20px;">
450
- <button class="view-toggle-btn active" id="detectionViewBtn">Detection View</button>
451
- <button class="view-toggle-btn" id="depthViewBtn">Depth View</button>
452
- </div>
453
-
454
- <div class="results-grid">
455
- <div class="video-card">
456
- <div class="video-card-header">First Frame</div>
457
- <div class="video-card-body">
458
- <img id="firstFrameImage" class="frame-preview" alt="First frame preview">
459
- </div>
460
- </div>
461
- <div class="video-card">
462
- <div class="video-card-header">Original Video</div>
463
- <div class="video-card-body">
464
- <video id="originalVideo" controls></video>
465
- </div>
466
- </div>
467
- <div class="video-card">
468
- <div class="video-card-header">Processed Video</div>
469
- <div class="video-card-body">
470
- <video id="processedVideo" controls autoplay loop></video>
471
- <a id="downloadBtn" class="download-btn" download="processed.mp4">
472
- Download Processed Video
473
- </a>
474
- </div>
475
- </div>
476
- </div>
477
- </div>
478
- </div>
479
- </div>
480
-
481
- <script>
482
- // State
483
- let selectedMode = 'object_detection';
484
- let videoFile = null;
485
- let currentView = 'detection'; // 'detection' or 'depth'
486
- let detectionVideoUrl = null;
487
- let depthVideoUrl = null;
488
- let detectionFirstFrameUrl = null;
489
-
490
- // Elements
491
- const modeCards = document.querySelectorAll('.mode-card');
492
- const queriesSection = document.getElementById('queriesSection');
493
- const queriesLabel = document.getElementById('queriesLabel');
494
- const queriesHint = document.getElementById('queriesHint');
495
- const detectorSection = document.getElementById('detectorSection');
496
- const segmenterSection = document.getElementById('segmenterSection');
497
- const droneModelSection = document.getElementById('droneModelSection');
498
- const fileInput = document.getElementById('videoFile');
499
- const fileLabel = document.getElementById('fileLabel');
500
- const processBtn = document.getElementById('processBtn');
501
- const loading = document.getElementById('loading');
502
- const resultsSection = document.getElementById('resultsSection');
503
- const originalVideo = document.getElementById('originalVideo');
504
- const processedVideo = document.getElementById('processedVideo');
505
- const firstFrameImage = document.getElementById('firstFrameImage');
506
- const downloadBtn = document.getElementById('downloadBtn');
507
- const viewToggleContainer = document.getElementById('viewToggleContainer');
508
- const detectionViewBtn = document.getElementById('detectionViewBtn');
509
- const depthViewBtn = document.getElementById('depthViewBtn');
510
- let statusPoller = null;
511
- const statusLine = document.getElementById('statusLine');
512
-
513
- // View switching function
514
- function switchToView(view) {
515
- currentView = view;
516
-
517
- if (view === 'detection') {
518
- detectionViewBtn.classList.add('active');
519
- depthViewBtn.classList.remove('active');
520
-
521
- if (detectionFirstFrameUrl) {
522
- firstFrameImage.src = detectionFirstFrameUrl;
523
- }
524
- if (detectionVideoUrl) {
525
- processedVideo.src = detectionVideoUrl;
526
- downloadBtn.href = detectionVideoUrl;
527
- downloadBtn.download = 'processed_detection.mp4';
528
- processedVideo.load();
529
- }
530
- } else {
531
- depthViewBtn.classList.add('active');
532
- detectionViewBtn.classList.remove('active');
533
-
534
- if (depthVideoUrl) {
535
- processedVideo.src = depthVideoUrl;
536
- downloadBtn.href = depthVideoUrl;
537
- downloadBtn.download = 'depth_map.mp4';
538
- processedVideo.load();
539
- }
540
- }
541
- }
542
-
543
- // Toggle button event listeners
544
- if (detectionViewBtn) {
545
- detectionViewBtn.addEventListener('click', () => switchToView('detection'));
546
- }
547
- if (depthViewBtn) {
548
- depthViewBtn.addEventListener('click', () => switchToView('depth'));
549
- }
550
- // Mode selection handler
551
- modeCards.forEach(card => {
552
- card.addEventListener('click', (e) => {
553
- const input = card.querySelector('input[type="radio"]');
554
- const mode = input.value;
555
-
556
- // Update selected state
557
- modeCards.forEach(c => c.classList.remove('selected'));
558
- card.classList.add('selected');
559
- selectedMode = mode;
560
-
561
- // Update query label and hint based on mode
562
- if (mode === 'object_detection') {
563
- queriesLabel.textContent = 'Objects to Detect (comma-separated)';
564
- queriesHint.textContent = 'Example: person, car, dog, bicycle';
565
- detectorSection.classList.remove('hidden');
566
- segmenterSection.classList.add('hidden');
567
- droneModelSection.classList.add('hidden');
568
- } else if (mode === 'segmentation') {
569
- queriesLabel.textContent = 'Objects to Segment (comma-separated)';
570
- queriesHint.textContent = 'Example: person, car, building, tree';
571
- detectorSection.classList.add('hidden');
572
- segmenterSection.classList.remove('hidden');
573
- droneModelSection.classList.add('hidden');
574
- } else if (mode === 'drone_detection') {
575
- queriesLabel.textContent = 'Optional Labels (comma-separated)';
576
- queriesHint.textContent = 'Example: drone, quadcopter';
577
- detectorSection.classList.add('hidden');
578
- segmenterSection.classList.add('hidden');
579
- droneModelSection.classList.remove('hidden');
580
- }
581
-
582
- // Always show queries section
583
- queriesSection.classList.remove('hidden');
584
- });
585
- });
586
-
587
- // File input handler
588
- fileInput.addEventListener('change', (e) => {
589
- videoFile = e.target.files[0];
590
- if (videoFile) {
591
- fileLabel.textContent = `✅ ${videoFile.name}`;
592
- fileLabel.classList.add('has-file');
593
- processBtn.disabled = false;
594
-
595
- // Preview original video
596
- originalVideo.src = URL.createObjectURL(videoFile);
597
- }
598
- });
599
-
600
- // Process button handler
601
- processBtn.addEventListener('click', async () => {
602
- if (!videoFile) {
603
- alert('Please select a video file first.');
604
- return;
605
- }
606
-
607
- // Show loading
608
- processBtn.disabled = true;
609
- loading.classList.add('show');
610
- resultsSection.classList.add('hidden');
611
- if (statusPoller) {
612
- clearInterval(statusPoller);
613
- statusPoller = null;
614
- }
615
- firstFrameImage.removeAttribute('src');
616
- processedVideo.removeAttribute('src');
617
- processedVideo.load();
618
- downloadBtn.removeAttribute('href');
619
- viewToggleContainer.classList.add('hidden');
620
- currentView = 'detection';
621
- detectionVideoUrl = null;
622
- depthVideoUrl = null;
623
- detectionFirstFrameUrl = null;
624
- statusLine.classList.add('hidden');
625
- statusLine.textContent = '';
626
-
627
- // Prepare form data
628
- const formData = new FormData();
629
- formData.append('video', videoFile);
630
- formData.append('mode', selectedMode);
631
- formData.append('queries', document.getElementById('queries').value);
632
- formData.append('detector', document.getElementById('detector').value);
633
- formData.append('segmenter', document.getElementById('segmenter').value);
634
- formData.append('depth_estimator', document.getElementById('depthModel').value);
635
-
636
- try {
637
- const response = await fetch('/detect/async', {
638
- method: 'POST',
639
- body: formData
640
- });
641
-
642
- if (!response.ok) {
643
- const error = await response.json();
644
- alert(`Error: ${error.detail || error.error || 'Processing failed'}`);
645
- return;
646
- }
647
-
648
- const data = await response.json();
649
- firstFrameImage.src = `${data.first_frame_url}?t=${Date.now()}`;
650
- resultsSection.classList.remove('hidden');
651
- statusLine.textContent = 'Status: processing';
652
- statusLine.classList.remove('hidden');
653
-
654
- statusPoller = setInterval(async () => {
655
- try {
656
- const statusResponse = await fetch(data.status_url);
657
- if (!statusResponse.ok) {
658
- clearInterval(statusPoller);
659
- statusPoller = null;
660
- statusLine.textContent = 'Status: expired (please re-upload)';
661
- alert('Job expired. Please re-upload the video.');
662
- return;
663
- }
664
- const statusData = await statusResponse.json();
665
- if (statusData.status === 'completed') {
666
- clearInterval(statusPoller);
667
- statusPoller = null;
668
- statusLine.textContent = 'Status: completed';
669
-
670
- // Fetch detection video
671
- const videoResponse = await fetch(data.video_url);
672
- if (!videoResponse.ok) {
673
- alert('Failed to fetch processed video.');
674
- return;
675
- }
676
- const blob = await videoResponse.blob();
677
- detectionVideoUrl = URL.createObjectURL(blob);
678
- detectionFirstFrameUrl = `${data.first_frame_url}?t=${Date.now()}`;
679
-
680
- // Set initial detection view
681
- processedVideo.src = detectionVideoUrl;
682
- downloadBtn.href = detectionVideoUrl;
683
-
684
- // Load depth assets
685
- await loadDepthAssets(data);
686
- } else if (statusData.status === 'failed') {
687
- clearInterval(statusPoller);
688
- statusPoller = null;
689
- statusLine.textContent = 'Status: failed';
690
- alert(statusData.error || 'Processing failed.');
691
- } else if (statusData.status) {
692
- statusLine.textContent = `Status: ${statusData.status}`;
693
- }
694
- } catch (pollError) {
695
- clearInterval(statusPoller);
696
- statusPoller = null;
697
- console.error('Polling error:', pollError);
698
- statusLine.textContent = 'Status: polling error';
699
- alert('Polling error: ' + pollError.message);
700
- }
701
- }, 10000);
702
- } catch (error) {
703
- console.error('Error:', error);
704
- alert('Network error: ' + error.message);
705
- } finally {
706
- loading.classList.remove('show');
707
- processBtn.disabled = false;
708
- }
709
- });
710
-
711
- async function loadDepthAssets(jobData) {
712
- if (!jobData.depth_video_url) {
713
- return;
714
- }
715
-
716
- try {
717
- const depthResponse = await fetch(jobData.depth_video_url);
718
- if (depthResponse.ok) {
719
- const depthBlob = await depthResponse.blob();
720
- depthVideoUrl = URL.createObjectURL(depthBlob);
721
-
722
- // Show toggle buttons now that we have both videos
723
- viewToggleContainer.classList.remove('hidden');
724
-
725
- // Start with detection view
726
- switchToView('detection');
727
- }
728
- } catch (error) {}
729
- }
730
-
731
- </script>
732
- </body>
733
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inference.py CHANGED
@@ -25,6 +25,94 @@ from utils.gpt_distance import estimate_distance_gpt
25
  import tempfile
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def _check_cancellation(job_id: Optional[str]) -> None:
29
  """Check if job has been cancelled and raise exception if so."""
30
  if job_id is None:
@@ -521,7 +609,7 @@ def process_first_frame(
521
  depth_estimator_name: Optional[str] = None,
522
  depth_scale: Optional[float] = None,
523
  enable_depth_estimator: bool = False,
524
- enable_gpt: bool = False,
525
  ) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
526
  frame, _, _, _ = extract_first_frame(video_path)
527
  if mode == "segmentation":
@@ -715,7 +803,12 @@ def run_inference(
715
  queue_out_max = max(64, (len(detectors) if detectors else 1) * 32)
716
  queue_out = Queue(maxsize=queue_out_max)
717
 
 
718
  # 6. Worker Function (Unified)
 
 
 
 
719
  def worker_task(gpu_idx: int):
720
  detector_instance = detectors[gpu_idx]
721
  depth_instance = depth_estimators[gpu_idx] if gpu_idx < len(depth_estimators) else None # Handle mismatched lists safely
@@ -734,7 +827,7 @@ def run_inference(
734
  try:
735
  if detector_instance.supports_batch:
736
  with detector_instance.lock:
737
- det_results = detector_instance.predict_batch(frames, queries) # Assuming predict_batch takes queries
738
  else:
739
  with detector_instance.lock:
740
  det_results = [detector_instance.predict(f, queries) for f in frames]
@@ -767,15 +860,13 @@ def run_inference(
767
  processed = frame.copy()
768
 
769
  # A. Render Depth Heatmap (if enabled)
770
- # Overwrites original frame visual
771
  if dep_res and dep_res.depth_map is not None:
772
  processed = colorize_depth_map(dep_res.depth_map, global_min, global_max)
773
- # Also optionally attach 'depth_rel' to detections based on this map?
774
  try:
775
  _attach_depth_from_result(detections, dep_res, depth_scale)
776
  except: pass
777
 
778
- # B. Render Boxes (on top of whatever visual we have)
779
  display_labels = [_build_display_label(d) for d in detections]
780
  if d_res:
781
  processed = draw_boxes(processed, d_res.boxes, label_names=display_labels)
@@ -786,41 +877,36 @@ def run_inference(
786
  queue_out.put((idx, processed, detections), timeout=1.0)
787
  break
788
  except Full:
 
 
 
789
  if job_id: _check_cancellation(job_id)
790
 
791
  batch_accum.clear()
792
 
793
  while True:
794
  item = queue_in.get()
795
- if item is None:
796
- flush_batch()
797
- queue_in.task_done()
798
- break
 
 
799
 
800
- frame_idx, frame_data = item
801
-
802
- if frame_idx % 30 == 0:
803
- logging.debug("Processing frame %d on device %s", frame_idx, "cpu" if num_gpus==0 else f"cuda:{gpu_idx}")
804
 
805
- try:
806
  batch_accum.append((frame_idx, frame_data))
807
  if len(batch_accum) >= batch_size:
808
  flush_batch()
809
  except Exception as e:
810
- logging.exception("Error processing batch around frame %d", frame_idx)
811
- # Fail strictly or soft?
812
- # If batch fails, we probably lost a chunk.
813
- # Put placeholders for what we have in accum
814
- for idx, frm in batch_accum:
815
- while True:
816
- try:
817
- queue_out.put((idx, frm, []), timeout=1.0)
818
- break
819
- except Full:
820
- if job_id: _check_cancellation(job_id)
821
- batch_accum.clear()
822
-
823
- queue_in.task_done()
824
 
825
  # 6. Start Workers
826
  workers = []
@@ -838,7 +924,9 @@ def run_inference(
838
 
839
  all_detections_map = {}
840
 
841
- writer_finished = False
 
 
842
 
843
  def writer_loop():
844
  nonlocal writer_finished
@@ -899,30 +987,46 @@ def run_inference(
899
  writer_thread = Thread(target=writer_loop, daemon=True)
900
  writer_thread.start()
901
 
 
902
  # 8. Feed Frames (Main Thread)
903
  try:
904
  frames_fed = 0
905
- for i, frame in enumerate(reader):
 
906
  _check_cancellation(job_id)
907
- if max_frames is not None and i >= max_frames:
908
  break
909
-
910
- queue_in.put((i, frame)) # Blocks if full
 
 
 
 
 
911
  frames_fed += 1
912
 
913
  # Signal workers to stop
914
  for _ in range(num_workers):
915
- queue_in.put(None)
916
-
 
 
 
917
  # Wait for queue to process
918
  queue_in.join()
919
 
920
  except Exception as e:
921
  logging.exception("Feeding frames failed")
 
 
 
 
 
922
  raise
923
  finally:
924
  reader.close()
925
 
 
926
  # Wait for writer
927
  writer_thread.join()
928
 
@@ -1001,6 +1105,8 @@ def run_segmentation(
1001
  queue_in = Queue(maxsize=16)
1002
  queue_out = Queue(maxsize=max(32, len(segmenters)*4))
1003
 
 
 
1004
  def worker_seg(gpu_idx: int):
1005
  seg = segmenters[gpu_idx]
1006
  batch_size = seg.max_batch_size if seg.supports_batch else 1
@@ -1033,6 +1139,8 @@ def run_segmentation(
1033
  queue_out.put((idx, processed), timeout=1.0)
1034
  break
1035
  except Full:
 
 
1036
  if job_id: _check_cancellation(job_id)
1037
 
1038
  except Exception as e:
@@ -1043,25 +1151,26 @@ def run_segmentation(
1043
  queue_out.put((idx, frm), timeout=1.0) # Fallback
1044
  break
1045
  except Full:
 
1046
  if job_id: _check_cancellation(job_id)
1047
  batch_accum.clear()
1048
 
1049
  while True:
1050
  item = queue_in.get()
1051
- if item is None:
1052
- flush_batch()
1053
- queue_in.task_done()
1054
- break
1055
-
1056
- idx, frame = item
1057
- batch_accum.append(item)
1058
- if idx % 30 == 0:
1059
- logging.debug("Seg frame %d (GPU %d)", idx, gpu_idx)
1060
-
1061
- if len(batch_accum) >= batch_size:
1062
- flush_batch()
1063
 
1064
- queue_in.task_done()
 
 
 
1065
 
1066
  workers = []
1067
  for i in range(len(segmenters)):
@@ -1070,7 +1179,13 @@ def run_segmentation(
1070
  workers.append(t)
1071
 
1072
  # Writer
1073
- writer_finished = False
 
 
 
 
 
 
1074
 
1075
  def writer_loop():
1076
  nonlocal writer_finished
@@ -1112,17 +1227,33 @@ def run_segmentation(
1112
 
1113
  # Feeder
1114
  try:
1115
- # reader = VideoReader(input_video_path) # Reusing existing reader
1116
- for i, frame in enumerate(reader):
 
1117
  _check_cancellation(job_id)
1118
- if max_frames is not None and i >= max_frames:
 
 
 
 
 
1119
  break
1120
- queue_in.put((i, frame))
 
 
1121
 
1122
  for _ in workers:
1123
- queue_in.put(None)
 
 
1124
  queue_in.join()
1125
 
 
 
 
 
 
 
1126
  finally:
1127
  reader.close()
1128
 
@@ -1292,6 +1423,8 @@ def run_depth_inference(
1292
  queue_out_max = max(32, (len(estimators) if estimators else 1) * 4)
1293
  queue_out = Queue(maxsize=queue_out_max)
1294
 
 
 
1295
  def worker_depth(gpu_idx: int):
1296
  est = estimators[gpu_idx]
1297
  batch_size = est.max_batch_size if est.supports_batch else 1
@@ -1320,7 +1453,6 @@ def run_depth_inference(
1320
  if detections and idx < len(detections):
1321
  frame_dets = detections[idx]
1322
  if frame_dets:
1323
- import cv2
1324
  boxes = []
1325
  labels = []
1326
  for d in frame_dets:
@@ -1336,6 +1468,8 @@ def run_depth_inference(
1336
  queue_out.put((idx, colored), timeout=1.0)
1337
  break
1338
  except Full:
 
 
1339
  if job_id: _check_cancellation(job_id)
1340
 
1341
  except Exception as e:
@@ -1346,26 +1480,27 @@ def run_depth_inference(
1346
  queue_out.put((idx, frm), timeout=1.0)
1347
  break
1348
  except Full:
 
1349
  if job_id: _check_cancellation(job_id)
1350
  batch_accum.clear()
1351
 
1352
  while True:
1353
  item = queue_in.get()
1354
- if item is None:
1355
- flush_batch()
1356
- queue_in.task_done()
1357
- break
1358
-
1359
- idx, frame = item
1360
- batch_accum.append(item)
1361
-
1362
- if idx % 30 == 0:
1363
- logging.info("Depth frame %d (GPU %d)", idx, gpu_idx)
1364
-
1365
- if len(batch_accum) >= batch_size:
1366
- flush_batch()
1367
 
1368
- queue_in.task_done()
 
 
 
1369
 
1370
  # Workers
1371
  workers = []
@@ -1375,7 +1510,9 @@ def run_depth_inference(
1375
  workers.append(t)
1376
 
1377
  # Writer
1378
- writer_finished = False
 
 
1379
  first_frame_saved = False
1380
 
1381
  def writer_loop():
@@ -1424,17 +1561,34 @@ def run_depth_inference(
1424
 
1425
  # Feeder
1426
  try:
1427
- # reader = VideoReader(input_video_path) # Reusing existing reader
1428
- for i, frame in enumerate(reader):
 
1429
  _check_cancellation(job_id)
1430
- if max_frames is not None and i >= max_frames:
 
 
 
 
 
1431
  break
1432
- queue_in.put((i, frame))
 
 
1433
 
1434
  for _ in workers:
1435
- queue_in.put(None)
 
 
1436
  queue_in.join()
1437
 
 
 
 
 
 
 
 
1438
  finally:
1439
  reader.close()
1440
 
 
25
  import tempfile
26
 
27
 
28
+ class AsyncVideoReader:
29
+ """
30
+ Async video reader that decodes frames in a background thread.
31
+
32
+ This prevents GPU starvation on multi-GPU systems by prefetching frames
33
+ while the main thread is busy dispatching work to GPUs.
34
+ """
35
+
36
+ def __init__(self, video_path: str, prefetch_size: int = 32):
37
+ """
38
+ Initialize async video reader.
39
+
40
+ Args:
41
+ video_path: Path to video file
42
+ prefetch_size: Number of frames to prefetch (default 32)
43
+ """
44
+ from queue import Queue
45
+ from threading import Thread
46
+
47
+ self.video_path = video_path
48
+ self.prefetch_size = prefetch_size
49
+
50
+ # Open video to get metadata
51
+ self._cap = cv2.VideoCapture(video_path)
52
+ if not self._cap.isOpened():
53
+ raise ValueError(f"Unable to open video: {video_path}")
54
+
55
+ self.fps = self._cap.get(cv2.CAP_PROP_FPS) or 30.0
56
+ self.width = int(self._cap.get(cv2.CAP_PROP_FRAME_WIDTH))
57
+ self.height = int(self._cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
58
+ self.total_frames = int(self._cap.get(cv2.CAP_PROP_FRAME_COUNT))
59
+
60
+ # Prefetch queue
61
+ self._queue: Queue = Queue(maxsize=prefetch_size)
62
+ self._error: Exception = None
63
+ self._finished = False
64
+
65
+ # Start decoder thread
66
+ self._thread = Thread(target=self._decode_loop, daemon=True)
67
+ self._thread.start()
68
+
69
+ def _decode_loop(self):
70
+ """Background thread that continuously decodes frames."""
71
+ try:
72
+ while True:
73
+ success, frame = self._cap.read()
74
+ if not success:
75
+ break
76
+ self._queue.put(frame) # Blocks when queue is full (backpressure)
77
+ except Exception as e:
78
+ self._error = e
79
+ logging.error(f"AsyncVideoReader decode error: {e}")
80
+ finally:
81
+ self._cap.release()
82
+ self._queue.put(None) # Sentinel to signal end
83
+ self._finished = True
84
+
85
+ def __iter__(self):
86
+ return self
87
+
88
+ def __next__(self) -> np.ndarray:
89
+ if self._error:
90
+ raise self._error
91
+
92
+ frame = self._queue.get()
93
+ if frame is None:
94
+ raise StopIteration
95
+ return frame
96
+
97
+ def close(self):
98
+ """Stop the decoder thread and release resources."""
99
+ # Signal thread to stop by releasing cap (if not already done)
100
+ if self._cap.isOpened():
101
+ self._cap.release()
102
+ # Drain queue to unblock thread if it's waiting on put()
103
+ while not self._queue.empty():
104
+ try:
105
+ self._queue.get_nowait()
106
+ except:
107
+ break
108
+
109
+ def __enter__(self):
110
+ return self
111
+
112
+ def __exit__(self, exc_type, exc_val, exc_tb):
113
+ self.close()
114
+
115
+
116
  def _check_cancellation(job_id: Optional[str]) -> None:
117
  """Check if job has been cancelled and raise exception if so."""
118
  if job_id is None:
 
609
  depth_estimator_name: Optional[str] = None,
610
  depth_scale: Optional[float] = None,
611
  enable_depth_estimator: bool = False,
612
+ enable_gpt: bool = True, # ENABLED BY DEFAULT
613
  ) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
614
  frame, _, _, _ = extract_first_frame(video_path)
615
  if mode == "segmentation":
 
803
  queue_out_max = max(64, (len(detectors) if detectors else 1) * 32)
804
  queue_out = Queue(maxsize=queue_out_max)
805
 
806
+
807
  # 6. Worker Function (Unified)
808
+
809
+ # Robustness: Define flag early so workers can see it
810
+ writer_finished = False
811
+
812
  def worker_task(gpu_idx: int):
813
  detector_instance = detectors[gpu_idx]
814
  depth_instance = depth_estimators[gpu_idx] if gpu_idx < len(depth_estimators) else None # Handle mismatched lists safely
 
827
  try:
828
  if detector_instance.supports_batch:
829
  with detector_instance.lock:
830
+ det_results = detector_instance.predict_batch(frames, queries)
831
  else:
832
  with detector_instance.lock:
833
  det_results = [detector_instance.predict(f, queries) for f in frames]
 
860
  processed = frame.copy()
861
 
862
  # A. Render Depth Heatmap (if enabled)
 
863
  if dep_res and dep_res.depth_map is not None:
864
  processed = colorize_depth_map(dep_res.depth_map, global_min, global_max)
 
865
  try:
866
  _attach_depth_from_result(detections, dep_res, depth_scale)
867
  except: pass
868
 
869
+ # B. Render Boxes
870
  display_labels = [_build_display_label(d) for d in detections]
871
  if d_res:
872
  processed = draw_boxes(processed, d_res.boxes, label_names=display_labels)
 
877
  queue_out.put((idx, processed, detections), timeout=1.0)
878
  break
879
  except Full:
880
+ # Robustness: Check if writer is dead
881
+ if writer_finished:
882
+ raise RuntimeError("Writer thread died unexpectedly")
883
  if job_id: _check_cancellation(job_id)
884
 
885
  batch_accum.clear()
886
 
887
  while True:
888
  item = queue_in.get()
889
+ try:
890
+ if item is None:
891
+ flush_batch()
892
+ break
893
+
894
+ frame_idx, frame_data = item
895
 
896
+ if frame_idx % 30 == 0:
897
+ logging.debug("Processing frame %d on device %s", frame_idx, "cpu" if num_gpus==0 else f"cuda:{gpu_idx}")
 
 
898
 
 
899
  batch_accum.append((frame_idx, frame_data))
900
  if len(batch_accum) >= batch_size:
901
  flush_batch()
902
  except Exception as e:
903
+ logging.exception("Worker failed processing frame")
904
+ # Important: If we lose a batch, the pipeline might stall waiting for those indices.
905
+ # Ideally we should emit error placeholders?
906
+ # For now, just ensure we don't hold the lock.
907
+ raise
908
+ finally:
909
+ queue_in.task_done()
 
 
 
 
 
 
 
910
 
911
  # 6. Start Workers
912
  workers = []
 
924
 
925
  all_detections_map = {}
926
 
927
+ # writer_finished initialized earlier
928
+ # writer_finished = False
929
+
930
 
931
  def writer_loop():
932
  nonlocal writer_finished
 
987
  writer_thread = Thread(target=writer_loop, daemon=True)
988
  writer_thread.start()
989
 
990
+ # 8. Feed Frames (Main Thread)
991
  # 8. Feed Frames (Main Thread)
992
  try:
993
  frames_fed = 0
994
+ reader_iter = iter(reader)
995
+ while True:
996
  _check_cancellation(job_id)
997
+ if max_frames is not None and frames_fed >= max_frames:
998
  break
999
+
1000
+ try:
1001
+ frame = next(reader_iter)
1002
+ except StopIteration:
1003
+ break
1004
+
1005
+ queue_in.put((frames_fed, frame)) # Blocks if full
1006
  frames_fed += 1
1007
 
1008
  # Signal workers to stop
1009
  for _ in range(num_workers):
1010
+ try:
1011
+ queue_in.put(None, timeout=5.0) # Using timeout to prevent infinite block
1012
+ except Full:
1013
+ logging.warning("Failed to send stop signal to a worker (queue full)")
1014
+
1015
  # Wait for queue to process
1016
  queue_in.join()
1017
 
1018
  except Exception as e:
1019
  logging.exception("Feeding frames failed")
1020
+ # Ensure we try to signal workers even on error
1021
+ for _ in range(num_workers):
1022
+ try:
1023
+ queue_in.put_nowait(None)
1024
+ except Full: pass
1025
  raise
1026
  finally:
1027
  reader.close()
1028
 
1029
+
1030
  # Wait for writer
1031
  writer_thread.join()
1032
 
 
1105
  queue_in = Queue(maxsize=16)
1106
  queue_out = Queue(maxsize=max(32, len(segmenters)*4))
1107
 
1108
+ writer_finished = False # Robustness
1109
+
1110
  def worker_seg(gpu_idx: int):
1111
  seg = segmenters[gpu_idx]
1112
  batch_size = seg.max_batch_size if seg.supports_batch else 1
 
1139
  queue_out.put((idx, processed), timeout=1.0)
1140
  break
1141
  except Full:
1142
+ if writer_finished:
1143
+ raise RuntimeError("Writer thread died")
1144
  if job_id: _check_cancellation(job_id)
1145
 
1146
  except Exception as e:
 
1151
  queue_out.put((idx, frm), timeout=1.0) # Fallback
1152
  break
1153
  except Full:
1154
+ if writer_finished: raise
1155
  if job_id: _check_cancellation(job_id)
1156
  batch_accum.clear()
1157
 
1158
  while True:
1159
  item = queue_in.get()
1160
+ try:
1161
+ if item is None:
1162
+ flush_batch()
1163
+ break
1164
+
1165
+ idx, frame = item
1166
+ batch_accum.append(item)
1167
+ if idx % 30 == 0:
1168
+ logging.debug("Seg frame %d (GPU %d)", idx, gpu_idx)
 
 
 
1169
 
1170
+ if len(batch_accum) >= batch_size:
1171
+ flush_batch()
1172
+ finally:
1173
+ queue_in.task_done()
1174
 
1175
  workers = []
1176
  for i in range(len(segmenters)):
 
1179
  workers.append(t)
1180
 
1181
  # Writer
1182
+ # writer_finished moved up for closure scope match
1183
+
1184
+
1185
+ # Writer
1186
+ # Writer
1187
+ # writer_finished defined earlier
1188
+
1189
 
1190
  def writer_loop():
1191
  nonlocal writer_finished
 
1227
 
1228
  # Feeder
1229
  try:
1230
+ reader_iter = iter(reader)
1231
+ frames_fed = 0
1232
+ while True:
1233
  _check_cancellation(job_id)
1234
+ if max_frames is not None and frames_fed >= max_frames:
1235
+ break
1236
+
1237
+ try:
1238
+ frame = next(reader_iter)
1239
+ except StopIteration:
1240
  break
1241
+
1242
+ queue_in.put((frames_fed, frame))
1243
+ frames_fed += 1
1244
 
1245
  for _ in workers:
1246
+ try: queue_in.put(None, timeout=5.0)
1247
+ except Full: pass
1248
+
1249
  queue_in.join()
1250
 
1251
+ except Exception:
1252
+ logging.exception("Segmentation loop failed")
1253
+ for _ in workers:
1254
+ try: queue_in.put_nowait(None)
1255
+ except Full: pass
1256
+ raise
1257
  finally:
1258
  reader.close()
1259
 
 
1423
  queue_out_max = max(32, (len(estimators) if estimators else 1) * 4)
1424
  queue_out = Queue(maxsize=queue_out_max)
1425
 
1426
+ writer_finished = False
1427
+
1428
  def worker_depth(gpu_idx: int):
1429
  est = estimators[gpu_idx]
1430
  batch_size = est.max_batch_size if est.supports_batch else 1
 
1453
  if detections and idx < len(detections):
1454
  frame_dets = detections[idx]
1455
  if frame_dets:
 
1456
  boxes = []
1457
  labels = []
1458
  for d in frame_dets:
 
1468
  queue_out.put((idx, colored), timeout=1.0)
1469
  break
1470
  except Full:
1471
+ if writer_finished:
1472
+ raise RuntimeError("Writer died")
1473
  if job_id: _check_cancellation(job_id)
1474
 
1475
  except Exception as e:
 
1480
  queue_out.put((idx, frm), timeout=1.0)
1481
  break
1482
  except Full:
1483
+ if writer_finished: raise
1484
  if job_id: _check_cancellation(job_id)
1485
  batch_accum.clear()
1486
 
1487
  while True:
1488
  item = queue_in.get()
1489
+ try:
1490
+ if item is None:
1491
+ flush_batch()
1492
+ break
1493
+
1494
+ idx, frame = item
1495
+ batch_accum.append(item)
1496
+
1497
+ if idx % 30 == 0:
1498
+ logging.info("Depth frame %d (GPU %d)", idx, gpu_idx)
 
 
 
1499
 
1500
+ if len(batch_accum) >= batch_size:
1501
+ flush_batch()
1502
+ finally:
1503
+ queue_in.task_done()
1504
 
1505
  # Workers
1506
  workers = []
 
1510
  workers.append(t)
1511
 
1512
  # Writer
1513
+ # Writer
1514
+ # writer_finished defined earlier
1515
+
1516
  first_frame_saved = False
1517
 
1518
  def writer_loop():
 
1561
 
1562
  # Feeder
1563
  try:
1564
+ reader_iter = iter(reader)
1565
+ frames_fed = 0
1566
+ while True:
1567
  _check_cancellation(job_id)
1568
+ if max_frames is not None and frames_fed >= max_frames:
1569
+ break
1570
+
1571
+ try:
1572
+ frame = next(reader_iter)
1573
+ except StopIteration:
1574
  break
1575
+
1576
+ queue_in.put((frames_fed, frame))
1577
+ frames_fed += 1
1578
 
1579
  for _ in workers:
1580
+ try: queue_in.put(None, timeout=5.0)
1581
+ except Full: pass
1582
+
1583
  queue_in.join()
1584
 
1585
+ except Exception:
1586
+ logging.exception("Depth loop failed")
1587
+ for _ in workers:
1588
+ try: queue_in.put_nowait(None)
1589
+ except Full: pass
1590
+ raise
1591
+
1592
  finally:
1593
  reader.close()
1594
 
requirements.txt CHANGED
@@ -4,13 +4,8 @@ torch
4
  transformers @ git+https://github.com/huggingface/transformers.git@main
5
  opencv-python-headless
6
  python-multipart
7
- accelerate
8
  pillow
9
- scipy
10
  huggingface-hub
11
  ultralytics
12
- timm
13
- ffmpeg-python
14
  python-dotenv
15
  einops
16
-
 
4
  transformers @ git+https://github.com/huggingface/transformers.git@main
5
  opencv-python-headless
6
  python-multipart
 
7
  pillow
 
8
  huggingface-hub
9
  ultralytics
 
 
10
  python-dotenv
11
  einops
 
update_radar.py DELETED
@@ -1,189 +0,0 @@
1
- import re
2
-
3
- file_path = 'LaserPerception/LaserPerception.js'
4
-
5
- new_code = r'''// ========= Radar rendering (Tab 2) - Aligned with Tab 1 Scale/FOV =========
6
- function renderRadar() {
7
- const ctx = radarCanvas.getContext("2d");
8
- const rect = radarCanvas.getBoundingClientRect();
9
- const dpr = devicePixelRatio || 1;
10
- const targetW = Math.max(1, Math.floor(rect.width * dpr));
11
- const targetH = Math.max(1, Math.floor(rect.height * dpr));
12
- if (radarCanvas.width !== targetW || radarCanvas.height !== targetH) {
13
- radarCanvas.width = targetW;
14
- radarCanvas.height = targetH;
15
- }
16
- const w = radarCanvas.width, h = radarCanvas.height;
17
- ctx.clearRect(0, 0, w, h);
18
-
19
- // Background (Matches Tab 1)
20
- ctx.fillStyle = "#0a0f22";
21
- ctx.fillRect(0, 0, w, h);
22
-
23
- const cx = w * 0.5, cy = h * 0.5;
24
- const R = Math.min(w, h) * 0.45; // Match Tab 1 Radius factor
25
-
26
- // Rings (Matches Tab 1 style)
27
- ctx.strokeStyle = "rgba(34, 211, 238, 0.1)";
28
- ctx.lineWidth = 1;
29
- for (let i = 1; i <= 4; i++) {
30
- ctx.beginPath();
31
- ctx.arc(cx, cy, R * (i / 4), 0, Math.PI * 2);
32
- ctx.stroke();
33
- }
34
- // Cross
35
- ctx.beginPath();
36
- ctx.moveTo(cx - R, cy); ctx.lineTo(cx + R, cy);
37
- ctx.moveTo(cx, cy - R); ctx.lineTo(cx, cy + R);
38
- ctx.stroke();
39
-
40
- // Sweep Animation
41
- const t = now() / 1500; // Match Tab 1 speed (slower)
42
- const ang = (t * (Math.PI * 2)) % (Math.PI * 2);
43
-
44
- // Gradient Sweep
45
- const grad = ctx.createConicGradient(ang + Math.PI / 2, cx, cy);
46
- grad.addColorStop(0, "transparent");
47
- grad.addColorStop(0.1, "transparent");
48
- grad.addColorStop(0.8, "rgba(34, 211, 238, 0.0)");
49
- grad.addColorStop(1, "rgba(34, 211, 238, 0.15)");
50
- ctx.fillStyle = grad;
51
- ctx.beginPath();
52
- ctx.arc(cx, cy, R, 0, Math.PI * 2);
53
- ctx.fill();
54
-
55
- // Scan Line
56
- ctx.strokeStyle = "rgba(34, 211, 238, 0.6)";
57
- ctx.lineWidth = 1.5;
58
- ctx.beginPath();
59
- ctx.moveTo(cx, cy);
60
- ctx.lineTo(cx + Math.cos(ang) * R, cy + Math.sin(ang) * R);
61
- ctx.stroke();
62
-
63
- // Ownship (Center)
64
- ctx.fillStyle = "#22d3ee";
65
- ctx.beginPath();
66
- ctx.arc(cx, cy, 3, 0, Math.PI * 2);
67
- ctx.fill();
68
- ctx.strokeStyle = "rgba(34, 211, 238, 0.5)";
69
- ctx.lineWidth = 1;
70
- ctx.beginPath();
71
- ctx.arc(cx, cy, 6, 0, Math.PI * 2);
72
- ctx.stroke();
73
-
74
- // Render Tracks (Tab 2 Source, Tab 1 Logic)
75
- const tracks = state.tracker.tracks;
76
- tracks.forEach(tr => {
77
- // Range Logic (Matches Tab 1)
78
- const areaRange = rangeFromArea(tr);
79
- const displayRange = getTrackDisplayRange(tr);
80
-
81
- let dist = 3000;
82
- if (Number.isFinite(displayRange.range)) dist = displayRange.range;
83
- else dist = areaRange; // fallback
84
-
85
- // Scale: 0 -> 1500m (Matches Tab 1)
86
- const maxRangeM = 1500;
87
- const rPx = (clamp(dist, 0, maxRangeM) / maxRangeM) * R;
88
-
89
- // Bearing Logic (Matches Tab 1 FOV=60)
90
- // We need normalized X center (-0.5 to 0.5)
91
- // tracks store pixel coordinates on current frame scale
92
- const vw = videoEngage.videoWidth || state.frame.w || 1280;
93
- const bx = tr.bbox.x + tr.bbox.w * 0.5;
94
- const tx = (bx / vw) - 0.5; // -0.5 (left) to 0.5 (right)
95
-
96
- const fovRad = (60 * Math.PI) / 180;
97
- const angle = (-Math.PI / 2) + (tx * fovRad);
98
-
99
- const px = cx + Math.cos(angle) * rPx;
100
- const py = cy + Math.sin(angle) * rPx;
101
-
102
- // Styling based on State
103
- const isSelected = (state.tracker.selectedTrackId === tr.id);
104
- const killed = tr.killed;
105
-
106
- const col = killed ? "rgba(148,163,184,.65)" :
107
- (tr.state === "FIRE" ? "rgba(239,68,68,.9)" :
108
- (tr.state === "ASSESS" ? "rgba(245,158,11,.9)" :
109
- (isSelected ? "#f59e0b" : "rgba(34, 211, 238, 0.9)"))); // Cyan default
110
-
111
- if (isSelected) {
112
- ctx.shadowBlur = 10;
113
- ctx.shadowColor = col;
114
- } else {
115
- ctx.shadowBlur = 0;
116
- }
117
-
118
- ctx.fillStyle = col;
119
- ctx.beginPath();
120
- ctx.arc(px, py, 5, 0, Math.PI * 2);
121
- ctx.fill();
122
-
123
- // Label
124
- if (!killed && (isSelected || tracks.length < 5)) {
125
- ctx.fillStyle = "rgba(255,255,255,.75)";
126
- ctx.font = "11px " + getComputedStyle(document.body).fontFamily;
127
- ctx.fillText(tr.id, px + 8, py + 4);
128
- }
129
- });
130
-
131
- // Legend
132
- ctx.shadowBlur = 0;
133
- ctx.fillStyle = "rgba(255,255,255,.55)";
134
- ctx.font = "11px " + getComputedStyle(document.body).fontFamily;
135
- ctx.fillText("LIVE TRACKING: 60° FOV, 1500m SCALE", 10, 18);
136
- }'''
137
-
138
- with open(file_path, 'r') as f:
139
- content = f.read()
140
-
141
- # Pattern to find the existing renderRadar function
142
- # We look for the comment and function definition, and try to match until the end of the function
143
- # This is tricky with regex for nested braces, but we know the structure roughly.
144
- # Let's try to match from 'function renderRadar() {' to the end of the file or next function?
145
- # Actually, precise replacement is better.
146
- # We know it starts at line ~3350 and ends at ~3445.
147
- # Let's just find the start string and replace until a known end string or just rely on the structure.
148
-
149
- start_str = '// ========= Radar rendering (Tab 2) ========='
150
- end_str = 'function renderRadar() {'
151
- # We need to find the closing brace for this function.
152
- # Let's assume correct indentation of " }" at the start of a line.
153
-
154
- lines = content.split('\n')
155
- start_idx = -1
156
- end_idx = -1
157
-
158
- for i, line in enumerate(lines):
159
- if line.strip() == '// ========= Radar rendering (Tab 2) =========':
160
- start_idx = i
161
- break
162
-
163
- if start_idx != -1:
164
- # Find the matching closing brace.
165
- # We can count braces.
166
- brace_count = 0
167
- found_start = False
168
- for i in range(start_idx, len(lines)):
169
- line = lines[i]
170
- brace_count += line.count('{')
171
- brace_count -= line.count('}')
172
- if '{' in line:
173
- found_start = True
174
-
175
- if found_start and brace_count == 0:
176
- end_idx = i
177
- break
178
-
179
- if start_idx != -1 and end_idx != -1:
180
- print(f"Replacing lines {start_idx} to {end_idx}")
181
- new_lines = new_code.split('\n')
182
- lines[start_idx:end_idx+1] = new_lines
183
-
184
- with open(file_path, 'w') as f:
185
- f.write('\n'.join(lines))
186
- print("Successfully updated renderRadar")
187
- else:
188
- print("Could not find renderRadar block")
189
- exit(1)