Zhen Ye Claude Opus 4.6 (1M context) commited on
Commit
157bd4f
·
1 Parent(s): 3223cd2

feat(inspection): add frame extraction and cropping module

Browse files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

inspection/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """Object Deep-Inspection backend module.
2
+
3
+ Provides on-demand analysis of individual detected objects:
4
+ frame extraction, mask retrieval, depth analysis, attention maps,
5
+ super-resolution, and 3D point clouds.
6
+ """
inspection/frames.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Frame extraction and cropping from input videos.
2
+
3
+ All operations use on-demand cv2.VideoCapture seeking — no frames are
4
+ pre-extracted or stored in memory.
5
+ """
6
+
7
+ import logging
8
+ from typing import List, Optional, Tuple
9
+
10
+ import cv2
11
+ import numpy as np
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def extract_frame(video_path: str, frame_idx: int) -> np.ndarray:
17
+ """Extract a single frame from a video by index.
18
+
19
+ Args:
20
+ video_path: Path to the video file.
21
+ frame_idx: Zero-based frame index.
22
+
23
+ Returns:
24
+ HxWx3 BGR uint8 numpy array.
25
+
26
+ Raises:
27
+ ValueError: If frame_idx is out of range or video cannot be opened.
28
+ FileNotFoundError: If video_path does not exist.
29
+ """
30
+ cap = cv2.VideoCapture(video_path)
31
+ if not cap.isOpened():
32
+ raise FileNotFoundError(f"Cannot open video: {video_path}")
33
+
34
+ try:
35
+ total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
36
+ if frame_idx < 0 or frame_idx >= total:
37
+ raise ValueError(
38
+ f"Frame index {frame_idx} out of range [0, {total})"
39
+ )
40
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
41
+ success, frame = cap.read()
42
+ if not success or frame is None:
43
+ raise ValueError(f"Failed to read frame {frame_idx}")
44
+ return frame
45
+ finally:
46
+ cap.release()
47
+
48
+
49
+ def get_video_info(video_path: str) -> dict:
50
+ """Return video metadata (total_frames, fps, width, height)."""
51
+ cap = cv2.VideoCapture(video_path)
52
+ if not cap.isOpened():
53
+ raise FileNotFoundError(f"Cannot open video: {video_path}")
54
+ try:
55
+ return {
56
+ "total_frames": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
57
+ "fps": cap.get(cv2.CAP_PROP_FPS) or 30.0,
58
+ "width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
59
+ "height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
60
+ }
61
+ finally:
62
+ cap.release()
63
+
64
+
65
+ def crop_frame(
66
+ frame: np.ndarray,
67
+ bbox: List[int],
68
+ padding: float = 0.15,
69
+ ) -> np.ndarray:
70
+ """Crop a frame to a bounding box with optional padding.
71
+
72
+ Args:
73
+ frame: HxWx3 BGR numpy array.
74
+ bbox: [x1, y1, x2, y2] in pixel coordinates.
75
+ padding: Fractional padding around the bbox (0.15 = 15% each side).
76
+
77
+ Returns:
78
+ Cropped HxWx3 BGR numpy array.
79
+ """
80
+ h, w = frame.shape[:2]
81
+ x1, y1, x2, y2 = bbox
82
+
83
+ bw = x2 - x1
84
+ bh = y2 - y1
85
+ pad_x = int(bw * padding)
86
+ pad_y = int(bh * padding)
87
+
88
+ cx1 = max(0, x1 - pad_x)
89
+ cy1 = max(0, y1 - pad_y)
90
+ cx2 = min(w, x2 + pad_x)
91
+ cy2 = min(h, y2 + pad_y)
92
+
93
+ return frame[cy1:cy2, cx1:cx2].copy()
94
+
95
+
96
+ def frame_to_jpeg(frame: np.ndarray, quality: int = 90) -> bytes:
97
+ """Encode a BGR frame as JPEG bytes.
98
+
99
+ Args:
100
+ frame: HxWx3 BGR numpy array.
101
+ quality: JPEG quality (1-100).
102
+
103
+ Returns:
104
+ JPEG bytes.
105
+ """
106
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
107
+ success, buffer = cv2.imencode(".jpg", frame, encode_param)
108
+ if not success:
109
+ raise RuntimeError("Failed to encode frame as JPEG")
110
+ return buffer.tobytes()
tests/__init__.py ADDED
File without changes
tests/test_inspection_frames.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+
5
+ def test_extract_frame_returns_bgr_array(tmp_path):
6
+ """extract_frame should return an HxWx3 BGR numpy array."""
7
+ from inspection.frames import extract_frame
8
+
9
+ # Create a tiny test video (10 frames, 64x48)
10
+ import cv2
11
+ video_path = str(tmp_path / "test.mp4")
12
+ writer = cv2.VideoWriter(
13
+ video_path, cv2.VideoWriter_fourcc(*"mp4v"), 30, (64, 48)
14
+ )
15
+ for i in range(10):
16
+ frame = np.full((48, 64, 3), i * 25, dtype=np.uint8)
17
+ writer.write(frame)
18
+ writer.release()
19
+
20
+ frame = extract_frame(video_path, 0)
21
+ assert isinstance(frame, np.ndarray)
22
+ assert frame.shape == (48, 64, 3)
23
+ assert frame.dtype == np.uint8
24
+
25
+
26
+ def test_extract_frame_different_indices(tmp_path):
27
+ """Different frame indices should return different pixel data."""
28
+ from inspection.frames import extract_frame
29
+ import cv2
30
+
31
+ video_path = str(tmp_path / "test.mp4")
32
+ writer = cv2.VideoWriter(
33
+ video_path, cv2.VideoWriter_fourcc(*"mp4v"), 30, (64, 48)
34
+ )
35
+ for i in range(10):
36
+ frame = np.full((48, 64, 3), i * 25, dtype=np.uint8)
37
+ writer.write(frame)
38
+ writer.release()
39
+
40
+ f0 = extract_frame(video_path, 0)
41
+ f5 = extract_frame(video_path, 5)
42
+ assert not np.array_equal(f0, f5)
43
+
44
+
45
+ def test_extract_frame_out_of_range(tmp_path):
46
+ """Out-of-range frame index should raise ValueError."""
47
+ from inspection.frames import extract_frame
48
+ import cv2
49
+
50
+ video_path = str(tmp_path / "test.mp4")
51
+ writer = cv2.VideoWriter(
52
+ video_path, cv2.VideoWriter_fourcc(*"mp4v"), 30, (64, 48)
53
+ )
54
+ for i in range(10):
55
+ writer.write(np.zeros((48, 64, 3), dtype=np.uint8))
56
+ writer.release()
57
+
58
+ with pytest.raises(ValueError, match="out of range"):
59
+ extract_frame(video_path, 999)
60
+
61
+
62
+ def test_crop_frame_to_bbox():
63
+ """crop_frame should extract the bbox region with padding."""
64
+ from inspection.frames import crop_frame
65
+
66
+ frame = np.zeros((200, 300, 3), dtype=np.uint8)
67
+ # Fill a known region with white
68
+ frame[50:100, 80:180] = 255
69
+
70
+ bbox = [80, 50, 180, 100] # x1, y1, x2, y2
71
+ crop = crop_frame(frame, bbox, padding=0.0)
72
+ assert crop.shape == (50, 100, 3)
73
+ assert np.all(crop == 255)
74
+
75
+
76
+ def test_crop_frame_with_padding():
77
+ """Padding should expand the crop region, clamped to frame bounds."""
78
+ from inspection.frames import crop_frame
79
+
80
+ frame = np.zeros((200, 300, 3), dtype=np.uint8)
81
+ bbox = [100, 50, 200, 150] # 100x100 box
82
+ crop = crop_frame(frame, bbox, padding=0.5)
83
+ # 50% padding on a 100x100 box = 50px each side
84
+ # Expected: x=[50,250], y=[0,200] (clamped)
85
+ assert crop.shape[0] > 100
86
+ assert crop.shape[1] > 100
87
+
88
+
89
+ def test_crop_frame_clamped_to_bounds():
90
+ """Padding that exceeds frame bounds should be clamped."""
91
+ from inspection.frames import crop_frame
92
+
93
+ frame = np.zeros((100, 100, 3), dtype=np.uint8)
94
+ bbox = [0, 0, 100, 100]
95
+ crop = crop_frame(frame, bbox, padding=1.0)
96
+ # Should not exceed original frame dimensions
97
+ assert crop.shape[0] <= 100
98
+ assert crop.shape[1] <= 100