harmesh95 commited on
Commit
a090915
·
verified ·
1 Parent(s): 453aad4

Upload 24 files

Browse files
backend/__init__.py ADDED
File without changes
backend/config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
4
+ DETECT_MODEL = os.path.join(BASE_DIR, "models", "yolov8n.pt")
5
+ POSE_MODEL = os.path.join(BASE_DIR, "models", "yolov8n-pose.pt")
6
+
7
+
8
+ # Thresholds and params
9
+ CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", 0.3))
10
+ INACTIVE_TIMEOUT = int(os.getenv("INACTIVE_TIMEOUT", 30))
11
+ FRAME_SKIP = int(os.getenv("FRAME_SKIP", 2))
12
+ INPUT_SIZE = int(os.getenv("INPUT_SIZE", 640))
13
+
14
+ # Paths
15
+ BASE_DIR = os.path.abspath(os.path.dirname(__file__))
16
+ OUTPUT_DIR = os.getenv("OUTPUT_DIR", os.path.join(BASE_DIR, "output"))
17
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
backend/data_extraction/__init__.py ADDED
File without changes
backend/data_extraction/interaction_analyzer.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from backend.utils.motion_utils import (
3
+ calc_avg_speed,
4
+ calc_motion_intensity,
5
+ calc_sudden_movements,
6
+ )
7
+ from backend.utils.interaction_utils import (
8
+ get_box_center,
9
+ euclidean_distance,
10
+ relative_distance,
11
+ relative_keypoints,
12
+ )
13
+
14
+
15
+ class InteractionAnalyzer:
16
+ """
17
+ Analyze human motion and interactions between people based on poses and bounding boxes.
18
+ """
19
+
20
+ def __init__(self):
21
+ # You can later add thresholds or state here if needed
22
+ pass
23
+
24
+ def calculate_motion_features(
25
+ self,
26
+ prev_poses: list[list[list[float]]],
27
+ current_poses: list[list[list[float]]],
28
+ ) -> dict:
29
+ """
30
+ Calculate motion features between consecutive frames.
31
+
32
+ Args:
33
+ prev_poses: List of keypoints for all people in previous frame
34
+ current_poses: List of keypoints for all people in current frame
35
+
36
+ Returns:
37
+ dict: {
38
+ "average_speed": float,
39
+ "motion_intensity": float,
40
+ "sudden_movements": int
41
+ }
42
+ """
43
+ return {
44
+ "average_speed": calc_avg_speed(prev_poses, current_poses),
45
+ "motion_intensity": calc_motion_intensity(prev_poses, current_poses),
46
+ "sudden_movements": calc_sudden_movements(prev_poses, current_poses),
47
+ }
48
+
49
+ def calculate_interactions(
50
+ self,
51
+ person_boxes: list[list[float]],
52
+ current_poses: list[list[list[float]]],
53
+ tracked_persons: dict,
54
+ ) -> list[dict]:
55
+ """
56
+ Calculate interactions between people based on bounding boxes and keypoints.
57
+
58
+ Args:
59
+ person_boxes: List of bounding boxes [[x1,y1,x2,y2], ...] for each person
60
+ current_poses: List of keypoints for each person
61
+ tracked_persons: Dict mapping person_id -> last tracked box
62
+
63
+ Returns:
64
+ List of dictionaries describing interactions between people
65
+ """
66
+ interactions = []
67
+
68
+ if len(person_boxes) < 2:
69
+ return interactions
70
+
71
+ for i in range(len(person_boxes)):
72
+ for j in range(i + 1, len(person_boxes)):
73
+ try:
74
+ # Ensure poses exist for both people
75
+ if i >= len(current_poses) or j >= len(current_poses):
76
+ continue
77
+
78
+ box1, box2 = person_boxes[i], person_boxes[j]
79
+ pose1, pose2 = current_poses[i], current_poses[j]
80
+
81
+ # Find person IDs
82
+ id1, id2 = None, None
83
+ for pid, tracked_box in tracked_persons.items():
84
+ if np.array_equal(box1, tracked_box):
85
+ id1 = pid
86
+ if np.array_equal(box2, tracked_box):
87
+ id2 = pid
88
+
89
+ if id1 is None or id2 is None:
90
+ continue
91
+
92
+ # Build interaction dictionary using utils
93
+ interaction = {
94
+ "person1_idx": i,
95
+ "person2_idx": j,
96
+ "person1_id": id1,
97
+ "person2_id": id2,
98
+ "box1": box1,
99
+ "box2": box2,
100
+ "center1": get_box_center(box1),
101
+ "center2": get_box_center(box2),
102
+ "distance": euclidean_distance(
103
+ get_box_center(box1), get_box_center(box2)
104
+ ),
105
+ "relative_distance": relative_distance(box1, box2),
106
+ "keypoints": {
107
+ "person1": pose1,
108
+ "person2": pose2,
109
+ "relative": relative_keypoints(pose1, pose2),
110
+ },
111
+ }
112
+ interactions.append(interaction)
113
+
114
+ except Exception as e:
115
+ print(f"Skipping interaction {i}-{j}: {e}")
116
+ continue
117
+
118
+ return interactions
backend/data_extraction/person_tracker.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from backend.utils.iou_utils import calculate_iou
3
+ from backend.utils.id_utils import get_new_id
4
+
5
+
6
+ class PersonTracker:
7
+ """
8
+ Tracks people across frames by assigning consistent IDs to bounding boxes.
9
+ """
10
+
11
+ def __init__(self, inactive_timeout=30):
12
+ self.person_id_counter = 0
13
+ self.tracked_persons = {} # {id: box}
14
+ self.inactive_persons = {} # future use
15
+ self.inactive_timeout = inactive_timeout
16
+
17
+ def assign_person_ids(self, current_boxes):
18
+ """
19
+ Assign IDs to current frame boxes based on IoU with previous frame.
20
+
21
+ Args:
22
+ current_boxes (list of list): [[x1, y1, x2, y2], ...]
23
+
24
+ Returns:
25
+ dict: {person_id: box} for current frame
26
+ """
27
+ new_tracked = {}
28
+ used_ids = set()
29
+
30
+ if not self.tracked_persons:
31
+ # First frame - assign new IDs to all boxes
32
+ for box in current_boxes:
33
+ person_id, self.person_id_counter = get_new_id(self.person_id_counter)
34
+ new_tracked[person_id] = box
35
+ else:
36
+ # Convert boxes to numpy arrays
37
+ current_boxes_np = np.array(current_boxes)
38
+ prev_boxes_np = np.array(list(self.tracked_persons.values()))
39
+
40
+ if len(current_boxes_np) > 0 and len(prev_boxes_np) > 0:
41
+ # Compute IoU matrix
42
+ iou_matrix = np.zeros((len(current_boxes_np), len(prev_boxes_np)))
43
+ for i, curr_box in enumerate(current_boxes_np):
44
+ for j, prev_box in enumerate(prev_boxes_np):
45
+ iou_matrix[i, j] = calculate_iou(curr_box, prev_box)
46
+
47
+ # Match boxes based on IoU > 0.3
48
+ matched_pairs = []
49
+ for i in range(len(current_boxes_np)):
50
+ max_j = np.argmax(iou_matrix[i])
51
+ if iou_matrix[i, max_j] > 0.3:
52
+ matched_pairs.append((i, max_j))
53
+
54
+ # Assign matched IDs
55
+ prev_ids = list(self.tracked_persons.keys())
56
+ for i, j in matched_pairs:
57
+ person_id = prev_ids[j]
58
+ new_tracked[person_id] = current_boxes_np[i]
59
+ used_ids.add(person_id)
60
+
61
+ # Assign new IDs to unmatched boxes
62
+ for i, box in enumerate(current_boxes_np):
63
+ if i not in [pair[0] for pair in matched_pairs]:
64
+ person_id, self.person_id_counter = get_new_id(
65
+ self.person_id_counter
66
+ )
67
+ new_tracked[person_id] = box
68
+
69
+ self.tracked_persons = new_tracked
70
+ return new_tracked
71
+
72
+ def reset(self):
73
+ """Reset the tracker for a new video."""
74
+ self.person_id_counter = 0
75
+ self.tracked_persons = {}
76
+ self.inactive_persons = {}
backend/feature_extraction/__init__.py ADDED
File without changes
backend/feature_extraction/extractor.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from backend.config import DETECT_MODEL, POSE_MODEL, CONF_THRESHOLD
3
+ from backend.utils.gpu import GPUConfigurator
4
+ from backend.preprocessing.preprocessor import FramePreprocessor
5
+ from backend.data_extraction.interaction_analyzer import InteractionAnalyzer
6
+ from backend.data_extraction.person_tracker import PersonTracker
7
+ from backend.utils.visualizer import Visualizer
8
+ import numpy as np
9
+ from ultralytics import YOLO
10
+
11
+
12
+ class VideoFeatureExtractor:
13
+ def __init__(self):
14
+ self.gpu_config = GPUConfigurator()
15
+ self.device = self.gpu_config.device
16
+
17
+ self.detection_model = YOLO(DETECT_MODEL).to(self.device)
18
+ self.pose_model = YOLO(POSE_MODEL).to(self.device)
19
+
20
+ self.preprocessor = FramePreprocessor()
21
+ self.interaction_analyzer = InteractionAnalyzer()
22
+ self.person_tracker = PersonTracker()
23
+ self.visualizer = Visualizer()
24
+
25
+ self.conf_threshold = CONF_THRESHOLD
26
+ self.prev_poses = None
27
+
28
+ self.person_tracker.reset()
29
+ self.prev_poses = None
30
+
31
+ def extract_features(self, frame, frame_idx):
32
+ """Extract features from a frame."""
33
+ try:
34
+ processed_frame, scale_info = self.preprocessor.preprocess_frame(frame)
35
+ if processed_frame is None:
36
+ return None, frame
37
+
38
+ frame_tensor = (
39
+ torch.from_numpy(processed_frame)
40
+ .permute(2, 0, 1)
41
+ .unsqueeze(0)
42
+ .to(self.device)
43
+ )
44
+
45
+ if frame_idx % 5 == 0:
46
+ torch.cuda.empty_cache()
47
+
48
+ with (
49
+ torch.no_grad(),
50
+ torch.amp.autocast(device_type="cuda", dtype=torch.float16),
51
+ ):
52
+ det_results = self.detection_model(
53
+ frame_tensor, conf=self.conf_threshold, verbose=False
54
+ )
55
+ pose_results = (
56
+ self.pose_model(
57
+ frame_tensor, conf=self.conf_threshold, verbose=False
58
+ )
59
+ if len(det_results[0].boxes) > 0
60
+ else []
61
+ )
62
+
63
+ frame_data = {
64
+ "frame_index": frame_idx,
65
+ "timestamp": frame_idx / 30,
66
+ "persons": [],
67
+ "objects": [],
68
+ "interactions": [],
69
+ "resized_width": scale_info.get("resized_size", (0, 0))[1],
70
+ "resized_height": scale_info.get("resized_size", (0, 0))[0],
71
+ }
72
+
73
+ # Process detections
74
+ person_boxes = []
75
+ for result in det_results:
76
+ for box in result.boxes:
77
+ try:
78
+ cls = result.names[int(box.cls[0])]
79
+ box_coords = box.xyxy[0].cpu().numpy().tolist()
80
+ if cls == "person":
81
+ person_boxes.append(box_coords)
82
+ else:
83
+ frame_data["objects"].append(
84
+ {
85
+ "class": cls,
86
+ "confidence": float(box.conf[0]),
87
+ "box": box_coords,
88
+ }
89
+ )
90
+ except Exception as e:
91
+ print(f"Detection processing error: {e}")
92
+ continue
93
+
94
+ # Track persons
95
+ tracked_persons = self.person_tracker.assign_person_ids(person_boxes)
96
+
97
+ # Process poses
98
+ current_poses = []
99
+ if pose_results:
100
+ for result in pose_results:
101
+ if result.keypoints:
102
+ for kpts in result.keypoints:
103
+ try:
104
+ pose_data = kpts.data[0].cpu().numpy().tolist()
105
+ current_poses.append(pose_data)
106
+ except Exception as e:
107
+ print(f"Pose processing error: {e}")
108
+ continue
109
+
110
+ # Match persons to poses
111
+ frame_data["persons"] = []
112
+ for i, box in enumerate(person_boxes):
113
+ try:
114
+ pose = current_poses[i] if i < len(current_poses) else None
115
+ if pose is None:
116
+ continue
117
+
118
+ # Find the person ID for this box
119
+ person_id = None
120
+ for pid, tracked_box in tracked_persons.items():
121
+ if np.array_equal(box, tracked_box):
122
+ person_id = pid
123
+ break
124
+
125
+ if person_id is None:
126
+ continue
127
+
128
+ frame_data["persons"].append(
129
+ {
130
+ "person_idx": i,
131
+ "person_id": person_id,
132
+ "box": box,
133
+ "center": [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2],
134
+ "keypoints": pose,
135
+ }
136
+ )
137
+
138
+ except Exception as e:
139
+ print(f"Skipping person {i} due to error: {e}")
140
+ continue
141
+
142
+ # Calculate motion features
143
+ motion_features = {
144
+ "average_speed": 0,
145
+ "motion_intensity": 0,
146
+ "sudden_movements": 0,
147
+ }
148
+
149
+ if self.prev_poses and current_poses:
150
+ try:
151
+ motion_features = (
152
+ self.interaction_analyzer.calculate_motion_features(
153
+ self.prev_poses, current_poses
154
+ )
155
+ )
156
+ except Exception as e:
157
+ print(f"Motion calculation error: {e}")
158
+
159
+ frame_data["motion_features"] = motion_features
160
+ self.prev_poses = current_poses
161
+
162
+ # Create interactions
163
+ frame_data["interactions"] = (
164
+ self.interaction_analyzer.calculate_interactions(
165
+ person_boxes, current_poses, tracked_persons
166
+ )
167
+ )
168
+
169
+ # Add motion features to frame data
170
+
171
+ annotated_frame = self.visualizer.draw_detections(
172
+ frame, det_results, pose_results, scale_info, tracked_persons
173
+ )
174
+
175
+ return frame_data, annotated_frame
176
+
177
+ except Exception as e:
178
+ print(f"Frame {frame_idx} failed completely: {e}")
179
+ return None, frame
180
+
181
+ def reset(self):
182
+ """Reset state for a new video."""
183
+ self.person_tracker.reset()
184
+ self.prev_poses = None
backend/models/yolov8n-pose.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f80660bc2f97d664d86fc9f50fd5903af392fe332c0d603fa0dd6c78bf8844c
3
+ size 6828990
backend/models/yolov8n.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31e20dde3def09e2cf938c7be6fe23d9150bbbe503982af13345706515f2ef95
3
+ size 6534387
backend/preprocessing/__init__.py ADDED
File without changes
backend/preprocessing/preprocessor.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+
5
+ class FramePreprocessor:
6
+ def __init__(self, input_size=640):
7
+ self.input_size = input_size
8
+
9
+ def set_resolution_config(self, frame_width, frame_height):
10
+ """Set appropriate configuration based on video resolution"""
11
+ max_dim = max(frame_width, frame_height)
12
+
13
+ # Adjust configuration based on resolution
14
+ if max_dim > 2560: # 4K
15
+ frame_skip = 2
16
+ batch_size = 1
17
+ elif max_dim > 1920: # 2K
18
+ frame_skip = 2
19
+ batch_size = 1
20
+ elif max_dim > 1280: # Full HD
21
+ frame_skip = 1
22
+ batch_size = 2
23
+ else: # HD or lower
24
+ frame_skip = 1
25
+ batch_size = 4
26
+
27
+ return batch_size, frame_skip
28
+
29
+ def preprocess_frame(self, frame):
30
+ """Preprocess frame while maintaining aspect ratio and handling high-res inputs"""
31
+ try:
32
+ # Convert BGR to RGB
33
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
34
+ original_h, original_w = frame_rgb.shape[:2]
35
+
36
+ # Calculate target size maintaining aspect ratio
37
+ scale = self.input_size / max(original_w, original_h)
38
+ target_w = int(original_w * scale)
39
+ target_h = int(original_h * scale)
40
+
41
+ # Resize image
42
+ resized = cv2.resize(
43
+ frame_rgb, (target_w, target_h), interpolation=cv2.INTER_AREA
44
+ )
45
+
46
+ # Create square canvas
47
+ canvas = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
48
+
49
+ # Calculate padding
50
+ pad_h = (self.input_size - target_h) // 2
51
+ pad_w = (self.input_size - target_w) // 2
52
+
53
+ # Place resized image on canvas
54
+ canvas[pad_h : pad_h + target_h, pad_w : pad_w + target_w] = resized
55
+
56
+ # Normalize
57
+ normalized = canvas.astype(np.float32) / 255.0
58
+
59
+ # Store scaling info
60
+ scale_info = {
61
+ "scale": scale,
62
+ "pad_w": pad_w,
63
+ "pad_h": pad_h,
64
+ "original_size": (original_h, original_w),
65
+ "resized_size": (target_h, target_w),
66
+ }
67
+
68
+ return normalized, scale_info
69
+
70
+ except Exception as e:
71
+ print(f"Preprocessing error: {e}")
72
+ return None, None
73
+
74
+ def rescale_coords(self, x, y, scale_info):
75
+ """Convert model coordinates back to original video dimensions"""
76
+ try:
77
+ scale = scale_info["scale"]
78
+ pad_w = scale_info["pad_w"]
79
+ pad_h = scale_info["pad_h"]
80
+ original_h, original_w = scale_info["original_size"]
81
+
82
+ # Remove padding and scale back to original dimensions
83
+ x_orig = int((x - pad_w) / scale)
84
+ y_orig = int((y - pad_h) / scale)
85
+
86
+ # Ensure coordinates are within bounds
87
+ x_orig = max(0, min(x_orig, original_w - 1))
88
+ y_orig = max(0, min(y_orig, original_h - 1))
89
+
90
+ return (x_orig, y_orig)
91
+
92
+ except Exception as e:
93
+ print(f"Rescaling error: {e}")
94
+ return (0, 0)
backend/services/__init__.py ADDED
File without changes
backend/services/prediction/__init__.py ADDED
File without changes
backend/services/prediction/predictor.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.preprocessing import MinMaxScaler
3
+
4
+
5
+ class ViolencePredictor:
6
+ def __init__(self):
7
+ self.scaler = MinMaxScaler()
8
+
9
+ def preprocess_data(self, df):
10
+ # Normalize coordinates, distances and keypoints
11
+ # Drop confidence columns
12
+ # Scale selected columns
13
+ # Similar as existing code...
14
+ """
15
+ Preprocess the data by normalizing box coordinates, center coordinates, distances, and keypoints.
16
+ """
17
+ # Normalize box coordinates
18
+ frame_height = df["frame_height"]
19
+ frame_width = df["frame_width"]
20
+ df["box1_x_min"] = df["box1_x_min"] / frame_width
21
+ df["box1_y_min"] = df["box1_y_min"] / frame_height
22
+ df["box1_x_max"] = df["box1_x_max"] / frame_width
23
+ df["box1_y_max"] = df["box1_y_max"] / frame_height
24
+
25
+ df["box2_x_min"] = df["box2_x_min"] / frame_width
26
+ df["box2_y_min"] = df["box2_y_min"] / frame_height
27
+ df["box2_x_max"] = df["box2_x_max"] / frame_width
28
+ df["box2_y_max"] = df["box2_y_max"] / frame_height
29
+
30
+ # Normalize center coordinates
31
+ df["center1_x"] = df["center1_x"] / frame_width
32
+ df["center1_y"] = df["center1_y"] / frame_height
33
+
34
+ df["center2_x"] = df["center2_x"] / frame_width
35
+ df["center2_y"] = df["center2_y"] / frame_height
36
+
37
+ # Normalize distances
38
+ max_distance = np.sqrt(frame_width**2 + frame_height**2)
39
+ df["distance"] = df["distance"] / max_distance
40
+ df["relative_distance"] = df["relative_distance"] / max_distance
41
+
42
+ # Drop confidence columns
43
+ drop_columns = (
44
+ [f"person1_kp{i}_conf" for i in range(17)]
45
+ + [f"person2_kp{i}_conf" for i in range(17)]
46
+ + [f"relative_kp{i}_conf" for i in range(17)]
47
+ )
48
+
49
+ existing_columns = [col for col in drop_columns if col in df.columns]
50
+ df = df.drop(columns=existing_columns)
51
+
52
+ # Normalize keypoints
53
+ for i in range(17):
54
+ for prefix in ["person1_kp", "person2_kp", "relative_kp"]:
55
+ x_col = f"{prefix}{i}_x"
56
+ y_col = f"{prefix}{i}_y"
57
+
58
+ if x_col in df.columns:
59
+ df[x_col] = df[x_col] / frame_width
60
+ if y_col in df.columns:
61
+ df[y_col] = df[y_col] / frame_height
62
+
63
+ # Scale specific columns
64
+ df["distance"] = self.scaler.fit_transform(df[["distance"]])
65
+ df["relative_distance"] = self.scaler.fit_transform(df[["relative_distance"]])
66
+ df["motion_average_speed"] = self.scaler.fit_transform(
67
+ df[["motion_average_speed"]]
68
+ )
69
+ df["motion_motion_intensity"] = self.scaler.fit_transform(
70
+ df[["motion_motion_intensity"]]
71
+ )
72
+ return df
73
+
74
+ def predict(self, data):
75
+ return 0
backend/services/video_data_extraction/__init__.py ADDED
File without changes
backend/services/video_data_extraction/video_preprocessor.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import torch
4
+ import pandas as pd
5
+ from backend.feature_extraction.extractor import VideoFeatureExtractor
6
+ from backend.utils.csv_utils import _create_interaction_row
7
+
8
+
9
+ class VideoDataExtractor:
10
+ def __init__(self):
11
+ self.extractor = VideoFeatureExtractor()
12
+
13
+ def extract_video_data(
14
+ self,
15
+ video_path,
16
+ output_csv_path,
17
+ output_folder=None,
18
+ show_video=False,
19
+ save_video=False,
20
+ ):
21
+ """
22
+ Extract data from a video file.
23
+
24
+ Args:
25
+ video_path: Path to input video
26
+ output_csv_path: Path to save CSV output
27
+ output_folder: Folder to save output video
28
+ show_video: Whether to display video during processing
29
+ save_video: Whether to save output video
30
+
31
+ Returns:
32
+ Tuple of (frame_width, frame_height, num_interactions)
33
+ """
34
+ cap = None
35
+ video_writer = None
36
+ csv_data = []
37
+ seen_interactions = set()
38
+
39
+ try:
40
+ if not os.path.exists(video_path):
41
+ raise FileNotFoundError(f"Video file not found: {video_path}")
42
+
43
+ cap = cv2.VideoCapture(video_path)
44
+ if not cap.isOpened():
45
+ raise ValueError("Error: Could not open video file")
46
+
47
+ fps = cap.get(cv2.CAP_PROP_FPS)
48
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
49
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
50
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
51
+
52
+ video_name = os.path.splitext(os.path.basename(video_path))[0]
53
+
54
+ # Set frame skip based on resolution
55
+ batch_size, frame_skip = self.extractor.preprocessor.set_resolution_config(
56
+ frame_width, frame_height
57
+ )
58
+ self.extractor.preprocessor.frame_skip = frame_skip
59
+
60
+ print(f"Processing video: {frame_width}x{frame_height} at {fps} fps")
61
+ print(f"Using frame_skip: {frame_skip}")
62
+
63
+ # Initialize video writer if needed
64
+ if output_folder and save_video:
65
+ os.makedirs(output_folder, exist_ok=True)
66
+ output_video_path = os.path.join(
67
+ output_folder, f"{video_name}_detections.mp4"
68
+ )
69
+ video_writer = cv2.VideoWriter(
70
+ output_video_path,
71
+ cv2.VideoWriter_fourcc(*"mp4v"),
72
+ fps / frame_skip,
73
+ (frame_width, frame_height),
74
+ )
75
+
76
+ # Reset extractor for new video
77
+ self.extractor.reset()
78
+
79
+ # Process frames
80
+ for frame_idx in range(0, total_frames, frame_skip):
81
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
82
+ ret, frame = cap.read()
83
+ if not ret:
84
+ break
85
+
86
+ # Extract features
87
+ frame_data, annotated_frame = self.extractor.extract_features(
88
+ frame, frame_idx
89
+ )
90
+
91
+ if frame_data is not None:
92
+ # Process interactions
93
+ for interaction in frame_data["interactions"]:
94
+ interaction_id = (
95
+ interaction["person1_id"],
96
+ interaction["person2_id"],
97
+ frame_idx,
98
+ )
99
+
100
+ if interaction_id not in seen_interactions:
101
+ seen_interactions.add(interaction_id)
102
+ row = _create_interaction_row(
103
+ video_name,
104
+ frame_data,
105
+ interaction,
106
+ frame_width,
107
+ frame_height,
108
+ )
109
+ csv_data.append(row)
110
+
111
+ # Write frame to output video
112
+ if video_writer is not None and annotated_frame is not None:
113
+ video_writer.write(annotated_frame)
114
+
115
+ # Show video if enabled
116
+ if show_video and annotated_frame is not None:
117
+ cv2.imshow("Video Data Extraction", annotated_frame)
118
+ key = cv2.waitKey(1) & 0xFF
119
+ if key == ord("q"):
120
+ break
121
+
122
+ # Clear memory periodically
123
+ if frame_idx % 100 == 0:
124
+ torch.cuda.empty_cache()
125
+
126
+ if csv_data:
127
+ df = pd.DataFrame(csv_data)
128
+
129
+ if os.path.exists(output_csv_path):
130
+ # Append to existing CSV
131
+ df.to_csv(output_csv_path, mode="a", header=False, index=False)
132
+ print(f"Appended {len(csv_data)} interactions to {output_csv_path}")
133
+ else:
134
+ # Save new CSV
135
+ df.to_csv(output_csv_path, index=False)
136
+ print(f"Saved {len(csv_data)} interactions to {output_csv_path}")
137
+
138
+ return frame_width, frame_height, len(csv_data)
139
+
140
+ finally:
141
+ if cap is not None:
142
+ cap.release()
143
+ if video_writer is not None:
144
+ video_writer.release()
145
+ cv2.destroyAllWindows()
146
+ torch.cuda.empty_cache()
backend/utils/__init__.py ADDED
File without changes
backend/utils/csv_utils.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def _create_interaction_row(
2
+ video_name, frame_data, interaction, frame_width, frame_height
3
+ ):
4
+ """Create a row of interaction data for CSV output."""
5
+ row = {
6
+ "video_name": video_name,
7
+ "frame_index": frame_data["frame_index"],
8
+ "timestamp": frame_data["timestamp"],
9
+ "frame_width": frame_width,
10
+ "frame_height": frame_height,
11
+ "person1_id": interaction["person1_id"],
12
+ "person2_id": interaction["person2_id"],
13
+ "box1_x_min": interaction["box1"][0],
14
+ "box1_y_min": interaction["box1"][1],
15
+ "box1_x_max": interaction["box1"][2],
16
+ "box1_y_max": interaction["box1"][3],
17
+ "box2_x_min": interaction["box2"][0],
18
+ "box2_y_min": interaction["box2"][1],
19
+ "box2_x_max": interaction["box2"][2],
20
+ "box2_y_max": interaction["box2"][3],
21
+ "center1_x": interaction["center1"][0],
22
+ "center1_y": interaction["center1"][1],
23
+ "center2_x": interaction["center2"][0],
24
+ "center2_y": interaction["center2"][1],
25
+ "distance": interaction["distance"],
26
+ "person1_idx": interaction["person1_idx"],
27
+ "person2_idx": interaction["person2_idx"],
28
+ "relative_distance": interaction["relative_distance"],
29
+ "motion_average_speed": frame_data["motion_features"]["average_speed"],
30
+ "motion_motion_intensity": frame_data["motion_features"]["motion_intensity"],
31
+ "motion_sudden_movements": frame_data["motion_features"]["sudden_movements"],
32
+ }
33
+
34
+ # Add keypoints data
35
+ keypoints_data = interaction["keypoints"]
36
+ for prefix in ["person1_kp", "person2_kp", "relative_kp"]:
37
+ for i in range(17):
38
+ for dim in ["_x", "_y", "_conf"]:
39
+ row[f"{prefix}{i}{dim}"] = None
40
+
41
+ # Fill in actual keypoint values if they exist
42
+ if isinstance(keypoints_data, dict):
43
+ for person_prefix, kp_data in [
44
+ ("person1_kp", keypoints_data.get("person1")),
45
+ ("person2_kp", keypoints_data.get("person2")),
46
+ ("relative_kp", keypoints_data.get("relative")),
47
+ ]:
48
+ if isinstance(kp_data, list):
49
+ for i, kp in enumerate(kp_data):
50
+ if i >= 17:
51
+ continue
52
+ if isinstance(kp, (list, tuple)) and len(kp) >= 3:
53
+ row[f"{person_prefix}{i}_x"] = float(kp[0])
54
+ row[f"{person_prefix}{i}_y"] = float(kp[1])
55
+ row[f"{person_prefix}{i}_conf"] = float(kp[2])
56
+
57
+ return row
backend/utils/gpu.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+
4
+ class GPUConfigurator:
5
+ def __init__(self):
6
+ self.device = self._setup_device()
7
+ self._configure_gpu()
8
+
9
+ def _setup_device(self):
10
+ if torch.cuda.is_available():
11
+ device = torch.device("cuda")
12
+ torch.zeros(1).to(device)
13
+ torch.cuda.synchronize()
14
+ print(f"Using GPU: {torch.cuda.get_device_name(0)}")
15
+ return device
16
+ print("No GPU available. Using CPU.")
17
+ return torch.device("cpu")
18
+
19
+ def _configure_gpu(self):
20
+ if self.device.type == 'cuda':
21
+ torch.backends.cudnn.benchmark = True
22
+ torch.set_float32_matmul_precision('high')
23
+ os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
backend/utils/id_utils.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def get_new_id(counter):
2
+ """Return a new ID and increment counter."""
3
+ new_id = counter
4
+ counter += 1
5
+ return new_id, counter
backend/utils/interaction_utils.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def get_box_center(box):
5
+ """
6
+ Calculate the center of a bounding box.
7
+
8
+ box: [x1, y1, x2, y2]
9
+ returns: [center_x, center_y]
10
+ """
11
+ return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]
12
+
13
+
14
+ def euclidean_distance(point1, point2):
15
+ """
16
+ Compute Euclidean distance between two points.
17
+
18
+ point1, point2: [x, y]
19
+ returns: float
20
+ """
21
+ return float(np.linalg.norm(np.array(point1) - np.array(point2)))
22
+
23
+
24
+ def relative_distance(box1, box2):
25
+ """
26
+ Compute relative distance between two boxes.
27
+
28
+ Returns distance normalized by sqrt(average box area)
29
+ """
30
+ center1 = get_box_center(box1)
31
+ center2 = get_box_center(box2)
32
+ distance = euclidean_distance(center1, center2)
33
+
34
+ area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
35
+ area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
36
+ avg_area = (area1 + area2) / 2
37
+
38
+ return distance / (avg_area**0.5)
39
+
40
+
41
+ def relative_keypoints(pose1, pose2):
42
+ """
43
+ Compute difference between keypoints of two people.
44
+
45
+ Returns a list of [dx, dy] for each keypoint.
46
+ """
47
+ return (np.array(pose2) - np.array(pose1)).tolist()
backend/utils/iou_utils.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def calculate_iou(box1, box2):
2
+ """Compute IoU between two boxes."""
3
+ xA = max(box1[0], box2[0])
4
+ yA = max(box1[1], box2[1])
5
+ xB = min(box1[2], box2[2])
6
+ yB = min(box1[3], box2[3])
7
+
8
+ inter_area = max(0, xB - xA) * max(0, yB - yA)
9
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
10
+ box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
11
+ union_area = box1_area + box2_area - inter_area
12
+
13
+ return inter_area / union_area if union_area > 0 else 0
backend/utils/motion_utils.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def calc_avg_speed(prev_poses: list, current_poses: list) -> float:
5
+ if not prev_poses or not current_poses:
6
+ return 0.0
7
+
8
+ prev_poses = np.array(prev_poses)
9
+ current_poses = np.array(current_poses)
10
+
11
+ if prev_poses.shape != current_poses.shape:
12
+ return 0.0
13
+
14
+ displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
15
+ return float(np.mean(displacement))
16
+
17
+
18
+ def calc_motion_intensity(prev_poses: list, current_poses: list) -> float:
19
+ if not prev_poses or not current_poses:
20
+ return 0.0
21
+
22
+ prev_poses = np.array(prev_poses)
23
+ current_poses = np.array(current_poses)
24
+
25
+ if prev_poses.shape != current_poses.shape:
26
+ return 0.0
27
+
28
+ displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
29
+ return float(np.std(displacement))
30
+
31
+
32
+ def calc_sudden_movements(prev_poses: list, current_poses: list) -> int:
33
+ if not prev_poses or not current_poses:
34
+ return 0
35
+
36
+ prev_poses = np.array(prev_poses)
37
+ current_poses = np.array(current_poses)
38
+
39
+ if prev_poses.shape != current_poses.shape:
40
+ return 0
41
+
42
+ displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
43
+ threshold = np.mean(displacement) + 2 * np.std(displacement)
44
+ return int(np.sum(displacement > threshold))
backend/utils/visualizer.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+ class Visualizer:
5
+ def __init__(self):
6
+ self.colors = {
7
+ "person": (0, 255, 0), # Green
8
+ "keypoint": (255, 255, 0), # Yellow
9
+ "connection": (0, 255, 255)# Cyan
10
+ }
11
+
12
+ def rescale_coords(self, x, y, scale_info):
13
+ """Convert model coordinates back to original video dimensions"""
14
+ scale = scale_info['scale']
15
+ pad_w = scale_info['pad_w']
16
+ pad_h = scale_info['pad_h']
17
+ original_h, original_w = scale_info['original_size']
18
+
19
+ # Remove padding and scale back to original dimensions
20
+ x_orig = int((x - pad_w) / scale)
21
+ y_orig = int((y - pad_h) / scale)
22
+
23
+ # Ensure coordinates are within bounds
24
+ x_orig = max(0, min(x_orig, original_w - 1))
25
+ y_orig = max(0, min(y_orig, original_h - 1))
26
+
27
+ return (x_orig, y_orig)
28
+
29
+ def draw_detections(self, frame, det_results, pose_results, scale_info, tracked_persons):
30
+ """Draw detections and poses on the frame."""
31
+ try:
32
+ display_frame = frame.copy()
33
+
34
+ # Draw person boxes with IDs first
35
+ for person_id, box in tracked_persons.items():
36
+ try:
37
+ if len(box) != 4:
38
+ continue
39
+
40
+ x1, y1, x2, y2 = map(float, box)
41
+ x1, y1 = self.rescale_coords(x1, y1, scale_info)
42
+ x2, y2 = self.rescale_coords(x2, y2, scale_info)
43
+
44
+ # Ensure coordinates are valid
45
+ if any(coord < 0 for coord in [x1, y1, x2, y2]):
46
+ continue
47
+
48
+ # Draw person box
49
+ cv2.rectangle(display_frame,
50
+ (int(x1), int(y1)),
51
+ (int(x2), int(y2)),
52
+ self.colors["person"],
53
+ 2)
54
+
55
+ # Draw person ID
56
+ id_text = f"ID:{person_id}"
57
+ (text_w, text_h), _ = cv2.getTextSize(
58
+ id_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2
59
+ )
60
+ cv2.rectangle(
61
+ display_frame,
62
+ (int(x2 - text_w - 5), int(y1)),
63
+ (int(x2), int(y1 + text_h + 5)),
64
+ self.colors["person"],
65
+ -1,
66
+ )
67
+ cv2.putText(
68
+ display_frame,
69
+ id_text,
70
+ (int(x2 - text_w - 2), int(y1 + text_h + 2)),
71
+ cv2.FONT_HERSHEY_SIMPLEX,
72
+ 0.5,
73
+ (0, 0, 255),
74
+ 1,
75
+ )
76
+ except Exception as e:
77
+ print(f"Error drawing person ID {person_id}: {e}")
78
+ continue
79
+
80
+ # Draw keypoints
81
+ if pose_results:
82
+ for result in pose_results:
83
+ if result.keypoints:
84
+ for kpts in result.keypoints:
85
+ try:
86
+ keypoints = kpts.data[0].cpu().numpy()
87
+ for kp in keypoints:
88
+ x, y, conf = kp
89
+ if conf > 0.5: # Only draw keypoints with high confidence
90
+ x, y = self.rescale_coords(x, y, scale_info)
91
+ cv2.circle(display_frame, (int(x), int(y)), 3, self.colors["keypoint"], -1)
92
+ except Exception as e:
93
+ print(f"Error drawing keypoints: {e}")
94
+ continue
95
+
96
+ return display_frame
97
+
98
+ except Exception as e:
99
+ print(f"Error in draw_detections: {e}")
100
+ return frame