Spaces:
Sleeping
Sleeping
refactor: clean up backend utilities and background job handler
Browse files- Simplify background.py exception handling (merge two except blocks)
- Remove unused imports and dead code in models.py, isr/utils.py
- Use get_pil_rgb() in frame_store adapter instead of manual BGR conversion
- Remove redundant dd benchmark variables in hardware_info.py
- Remove unused decode timing vars in profiler.py
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
- jobs/background.py +4 -32
- jobs/models.py +1 -1
- models/isr/utils.py +1 -7
- utils/frame_store.py +2 -3
- utils/hardware_info.py +3 -11
- utils/profiler.py +0 -9
jobs/background.py
CHANGED
|
@@ -17,8 +17,6 @@ async def process_video_async(job_id: str) -> None:
|
|
| 17 |
|
| 18 |
detection_path = None
|
| 19 |
depth_path = None
|
| 20 |
-
depth_error = None
|
| 21 |
-
partial_success = False
|
| 22 |
isr_task = None
|
| 23 |
|
| 24 |
# Create stream for live view
|
|
@@ -47,9 +45,7 @@ async def process_video_async(job_id: str) -> None:
|
|
| 47 |
_ttfs_t0=job.ttfs_t0,
|
| 48 |
)
|
| 49 |
else:
|
| 50 |
-
|
| 51 |
-
# Unified inference pipeline (handles depth internally if enabled)
|
| 52 |
-
result_pkg = await asyncio.to_thread(
|
| 53 |
run_inference,
|
| 54 |
job.input_video_path,
|
| 55 |
job.output_video_path,
|
|
@@ -63,20 +59,6 @@ async def process_video_async(job_id: str) -> None:
|
|
| 63 |
None, # first_frame_detections
|
| 64 |
job.ttfs_t0,
|
| 65 |
)
|
| 66 |
-
detection_path, detections_list = result_pkg
|
| 67 |
-
|
| 68 |
-
# If depth was requested, checking if output path exists for depth
|
| 69 |
-
# The unified pipeline creates 'output_video_path'.
|
| 70 |
-
# If depth enabled, it might have written depth there?
|
| 71 |
-
# Actually run_inference returns (video_path, detections).
|
| 72 |
-
# If depth was ON, the video at video_path *has* depth overlays.
|
| 73 |
-
# But the 'Depth Video' (heatmap only) is usually separate.
|
| 74 |
-
# Our Plan says: "Unified loop... Write Frame to Disk".
|
| 75 |
-
# If we want separate depth video, we need `run_inference` to handle it.
|
| 76 |
-
# Or just update 'depth_path' to be the same main video if it's merged?
|
| 77 |
-
# Let's keep it simple: If depth enabled, the main video IS the depth view (overlay).
|
| 78 |
-
# Or if we want separate `depth_output_path`, we need `run_inference` to handle it.
|
| 79 |
-
# Let's assume for now `run_inference` writes the main visualization path.
|
| 80 |
|
| 81 |
if job.depth_estimator_name:
|
| 82 |
# In unified mode, the main video contains the depth viz
|
|
@@ -98,13 +80,11 @@ async def process_video_async(job_id: str) -> None:
|
|
| 98 |
completed_at=datetime.utcnow(),
|
| 99 |
output_video_path=detection_path,
|
| 100 |
depth_output_path=depth_path,
|
| 101 |
-
partial_success=partial_success,
|
| 102 |
-
depth_error=depth_error,
|
| 103 |
)
|
| 104 |
|
| 105 |
-
except
|
| 106 |
-
|
| 107 |
-
if
|
| 108 |
logging.info("Job %s was cancelled", job_id)
|
| 109 |
storage.update(
|
| 110 |
job_id,
|
|
@@ -120,13 +100,5 @@ async def process_video_async(job_id: str) -> None:
|
|
| 120 |
completed_at=datetime.utcnow(),
|
| 121 |
error=str(exc),
|
| 122 |
)
|
| 123 |
-
except Exception as exc:
|
| 124 |
-
logging.exception("Background processing failed for job %s", job_id)
|
| 125 |
-
storage.update(
|
| 126 |
-
job_id,
|
| 127 |
-
status=JobStatus.FAILED,
|
| 128 |
-
completed_at=datetime.utcnow(),
|
| 129 |
-
error=str(exc),
|
| 130 |
-
)
|
| 131 |
finally:
|
| 132 |
remove_stream(job_id)
|
|
|
|
| 17 |
|
| 18 |
detection_path = None
|
| 19 |
depth_path = None
|
|
|
|
|
|
|
| 20 |
isr_task = None
|
| 21 |
|
| 22 |
# Create stream for live view
|
|
|
|
| 45 |
_ttfs_t0=job.ttfs_t0,
|
| 46 |
)
|
| 47 |
else:
|
| 48 |
+
detection_path, _ = await asyncio.to_thread(
|
|
|
|
|
|
|
| 49 |
run_inference,
|
| 50 |
job.input_video_path,
|
| 51 |
job.output_video_path,
|
|
|
|
| 59 |
None, # first_frame_detections
|
| 60 |
job.ttfs_t0,
|
| 61 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
if job.depth_estimator_name:
|
| 64 |
# In unified mode, the main video contains the depth viz
|
|
|
|
| 80 |
completed_at=datetime.utcnow(),
|
| 81 |
output_video_path=detection_path,
|
| 82 |
depth_output_path=depth_path,
|
|
|
|
|
|
|
| 83 |
)
|
| 84 |
|
| 85 |
+
except Exception as exc:
|
| 86 |
+
is_cancel = isinstance(exc, RuntimeError) and "cancelled" in str(exc).lower()
|
| 87 |
+
if is_cancel:
|
| 88 |
logging.info("Job %s was cancelled", job_id)
|
| 89 |
storage.update(
|
| 90 |
job_id,
|
|
|
|
| 100 |
completed_at=datetime.utcnow(),
|
| 101 |
error=str(exc),
|
| 102 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
finally:
|
| 104 |
remove_stream(job_id)
|
jobs/models.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from dataclasses import dataclass, field
|
| 2 |
from datetime import datetime
|
| 3 |
from enum import Enum
|
| 4 |
-
from typing import
|
| 5 |
|
| 6 |
|
| 7 |
class JobStatus(str, Enum):
|
|
|
|
| 1 |
from dataclasses import dataclass, field
|
| 2 |
from datetime import datetime
|
| 3 |
from enum import Enum
|
| 4 |
+
from typing import List, Optional
|
| 5 |
|
| 6 |
|
| 7 |
class JobStatus(str, Enum):
|
models/isr/utils.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
| 3 |
import base64
|
| 4 |
import json
|
| 5 |
import logging
|
| 6 |
-
import os
|
| 7 |
|
| 8 |
import cv2
|
| 9 |
import numpy as np
|
|
@@ -11,11 +10,6 @@ import numpy as np
|
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
|
| 14 |
-
def get_api_key(env_var: str) -> str | None:
|
| 15 |
-
"""Load an API key from environment."""
|
| 16 |
-
return os.environ.get(env_var)
|
| 17 |
-
|
| 18 |
-
|
| 19 |
def crop_and_encode(frame: np.ndarray, bbox: list, max_dim: int = 256, quality: int = 70) -> str | None:
|
| 20 |
"""Crop detection from frame and return base64 JPEG string.
|
| 21 |
|
|
@@ -83,6 +77,6 @@ def parse_llm_json(raw: str) -> dict | list | None:
|
|
| 83 |
try:
|
| 84 |
cleaned = strip_json_fences(raw)
|
| 85 |
return json.loads(cleaned)
|
| 86 |
-
except (json.JSONDecodeError,
|
| 87 |
logger.warning("Failed to parse LLM JSON response")
|
| 88 |
return None
|
|
|
|
| 3 |
import base64
|
| 4 |
import json
|
| 5 |
import logging
|
|
|
|
| 6 |
|
| 7 |
import cv2
|
| 8 |
import numpy as np
|
|
|
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def crop_and_encode(frame: np.ndarray, bbox: list, max_dim: int = 256, quality: int = 70) -> str | None:
|
| 14 |
"""Crop detection from frame and return base64 JPEG string.
|
| 15 |
|
|
|
|
| 77 |
try:
|
| 78 |
cleaned = strip_json_fences(raw)
|
| 79 |
return json.loads(cleaned)
|
| 80 |
+
except (json.JSONDecodeError, ValueError):
|
| 81 |
logger.warning("Failed to parse LLM JSON response")
|
| 82 |
return None
|
utils/frame_store.py
CHANGED
|
@@ -140,10 +140,9 @@ class SAM2FrameAdapter:
|
|
| 140 |
return self.images[idx]
|
| 141 |
|
| 142 |
# TRANSFORM PARITY: Must match SAM2's _load_img_as_tensor exactly.
|
| 143 |
-
# SAM2 does: PIL Image
|
| 144 |
# PIL.resize default = BICUBIC. We must use PIL resize, NOT cv2.resize.
|
| 145 |
-
|
| 146 |
-
pil_img = Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
|
| 147 |
pil_resized = pil_img.resize(
|
| 148 |
(self._image_size, self._image_size)
|
| 149 |
) # BICUBIC default
|
|
|
|
| 140 |
return self.images[idx]
|
| 141 |
|
| 142 |
# TRANSFORM PARITY: Must match SAM2's _load_img_as_tensor exactly.
|
| 143 |
+
# SAM2 does: PIL Image -> .convert("RGB") -> .resize((size, size)) -> /255 -> permute -> normalize
|
| 144 |
# PIL.resize default = BICUBIC. We must use PIL resize, NOT cv2.resize.
|
| 145 |
+
pil_img = self._store.get_pil_rgb(idx)
|
|
|
|
| 146 |
pil_resized = pil_img.resize(
|
| 147 |
(self._image_size, self._image_size)
|
| 148 |
) # BICUBIC default
|
utils/hardware_info.py
CHANGED
|
@@ -228,8 +228,6 @@ def get_gpu_info() -> List[GPUInfo]:
|
|
| 228 |
nv = _nvidia_smi_query("driver_version")
|
| 229 |
if nv:
|
| 230 |
driver_version = nv.get("driver_version")
|
| 231 |
-
# nvidia-smi reports the max supported CUDA runtime
|
| 232 |
-
nv2 = _run_cmd(["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"])
|
| 233 |
smi_out = _run_cmd(["nvidia-smi"])
|
| 234 |
if smi_out:
|
| 235 |
m = re.search(r"CUDA Version:\s+([\d.]+)", smi_out)
|
|
@@ -338,20 +336,14 @@ def get_storage_info() -> StorageInfo:
|
|
| 338 |
info.storage_type = "HDD" if parts[1] == "1" else "SSD"
|
| 339 |
break
|
| 340 |
|
| 341 |
-
#
|
| 342 |
-
|
| 343 |
-
["dd", "if=/dev/zero", "of=/dev/null", "bs=1M", "count=256"],
|
| 344 |
-
timeout=15,
|
| 345 |
-
)
|
| 346 |
-
# dd prints throughput to stderr, but _run_cmd only captures stdout
|
| 347 |
-
# Try a different approach
|
| 348 |
try:
|
| 349 |
result = subprocess.run(
|
| 350 |
["dd", "if=/dev/zero", "of=/dev/null", "bs=1M", "count=256"],
|
| 351 |
capture_output=True, text=True, timeout=15,
|
| 352 |
)
|
| 353 |
-
|
| 354 |
-
m = re.search(r"([\d.]+)\s*(GB|MB)/s", stderr)
|
| 355 |
if m:
|
| 356 |
speed = float(m.group(1))
|
| 357 |
if m.group(2) == "GB":
|
|
|
|
| 228 |
nv = _nvidia_smi_query("driver_version")
|
| 229 |
if nv:
|
| 230 |
driver_version = nv.get("driver_version")
|
|
|
|
|
|
|
| 231 |
smi_out = _run_cmd(["nvidia-smi"])
|
| 232 |
if smi_out:
|
| 233 |
m = re.search(r"CUDA Version:\s+([\d.]+)", smi_out)
|
|
|
|
| 336 |
info.storage_type = "HDD" if parts[1] == "1" else "SSD"
|
| 337 |
break
|
| 338 |
|
| 339 |
+
# Estimate sequential read speed (memory throughput proxy — real disk
|
| 340 |
+
# benchmarks require block device access unavailable in containers)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
try:
|
| 342 |
result = subprocess.run(
|
| 343 |
["dd", "if=/dev/zero", "of=/dev/null", "bs=1M", "count=256"],
|
| 344 |
capture_output=True, text=True, timeout=15,
|
| 345 |
)
|
| 346 |
+
m = re.search(r"([\d.]+)\s*(GB|MB)/s", result.stderr)
|
|
|
|
| 347 |
if m:
|
| 348 |
speed = float(m.group(1))
|
| 349 |
if m.group(2) == "GB":
|
utils/profiler.py
CHANGED
|
@@ -250,12 +250,6 @@ def run_profiled_detection(
|
|
| 250 |
if frame_idx >= max_frames:
|
| 251 |
break
|
| 252 |
|
| 253 |
-
# Decode timing
|
| 254 |
-
t_decode_start = time.perf_counter()
|
| 255 |
-
# frame is already decoded by VideoReader, so decode = iteration time
|
| 256 |
-
# We measure it before predict for consistency
|
| 257 |
-
decode_ms = 0.0 # Measured below
|
| 258 |
-
|
| 259 |
if frame_idx < warmup_frames:
|
| 260 |
# Warmup: run prediction but don't record
|
| 261 |
if is_decomposable:
|
|
@@ -265,9 +259,6 @@ def run_profiled_detection(
|
|
| 265 |
frame_idx += 1
|
| 266 |
continue
|
| 267 |
|
| 268 |
-
# Time the decode (approximated as read time for next frame)
|
| 269 |
-
t_before = time.perf_counter()
|
| 270 |
-
|
| 271 |
# Profile prediction
|
| 272 |
if is_decomposable:
|
| 273 |
timing = _profile_decomposed(detector, frame, queries)
|
|
|
|
| 250 |
if frame_idx >= max_frames:
|
| 251 |
break
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
if frame_idx < warmup_frames:
|
| 254 |
# Warmup: run prediction but don't record
|
| 255 |
if is_decomposable:
|
|
|
|
| 259 |
frame_idx += 1
|
| 260 |
continue
|
| 261 |
|
|
|
|
|
|
|
|
|
|
| 262 |
# Profile prediction
|
| 263 |
if is_decomposable:
|
| 264 |
timing = _profile_decomposed(detector, frame, queries)
|