Upload 24 files
Browse files- backend/__init__.py +0 -0
- backend/config.py +17 -0
- backend/data_extraction/__init__.py +0 -0
- backend/data_extraction/interaction_analyzer.py +118 -0
- backend/data_extraction/person_tracker.py +76 -0
- backend/feature_extraction/__init__.py +0 -0
- backend/feature_extraction/extractor.py +184 -0
- backend/models/yolov8n-pose.pt +3 -0
- backend/models/yolov8n.pt +3 -0
- backend/preprocessing/__init__.py +0 -0
- backend/preprocessing/preprocessor.py +94 -0
- backend/services/__init__.py +0 -0
- backend/services/prediction/__init__.py +0 -0
- backend/services/prediction/predictor.py +75 -0
- backend/services/video_data_extraction/__init__.py +0 -0
- backend/services/video_data_extraction/video_preprocessor.py +146 -0
- backend/utils/__init__.py +0 -0
- backend/utils/csv_utils.py +57 -0
- backend/utils/gpu.py +23 -0
- backend/utils/id_utils.py +5 -0
- backend/utils/interaction_utils.py +47 -0
- backend/utils/iou_utils.py +13 -0
- backend/utils/motion_utils.py +44 -0
- backend/utils/visualizer.py +100 -0
backend/__init__.py
ADDED
|
File without changes
|
backend/config.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 4 |
+
DETECT_MODEL = os.path.join(BASE_DIR, "models", "yolov8n.pt")
|
| 5 |
+
POSE_MODEL = os.path.join(BASE_DIR, "models", "yolov8n-pose.pt")
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# Thresholds and params
|
| 9 |
+
CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", 0.3))
|
| 10 |
+
INACTIVE_TIMEOUT = int(os.getenv("INACTIVE_TIMEOUT", 30))
|
| 11 |
+
FRAME_SKIP = int(os.getenv("FRAME_SKIP", 2))
|
| 12 |
+
INPUT_SIZE = int(os.getenv("INPUT_SIZE", 640))
|
| 13 |
+
|
| 14 |
+
# Paths
|
| 15 |
+
BASE_DIR = os.path.abspath(os.path.dirname(__file__))
|
| 16 |
+
OUTPUT_DIR = os.getenv("OUTPUT_DIR", os.path.join(BASE_DIR, "output"))
|
| 17 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
backend/data_extraction/__init__.py
ADDED
|
File without changes
|
backend/data_extraction/interaction_analyzer.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from backend.utils.motion_utils import (
|
| 3 |
+
calc_avg_speed,
|
| 4 |
+
calc_motion_intensity,
|
| 5 |
+
calc_sudden_movements,
|
| 6 |
+
)
|
| 7 |
+
from backend.utils.interaction_utils import (
|
| 8 |
+
get_box_center,
|
| 9 |
+
euclidean_distance,
|
| 10 |
+
relative_distance,
|
| 11 |
+
relative_keypoints,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class InteractionAnalyzer:
|
| 16 |
+
"""
|
| 17 |
+
Analyze human motion and interactions between people based on poses and bounding boxes.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self):
|
| 21 |
+
# You can later add thresholds or state here if needed
|
| 22 |
+
pass
|
| 23 |
+
|
| 24 |
+
def calculate_motion_features(
|
| 25 |
+
self,
|
| 26 |
+
prev_poses: list[list[list[float]]],
|
| 27 |
+
current_poses: list[list[list[float]]],
|
| 28 |
+
) -> dict:
|
| 29 |
+
"""
|
| 30 |
+
Calculate motion features between consecutive frames.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
prev_poses: List of keypoints for all people in previous frame
|
| 34 |
+
current_poses: List of keypoints for all people in current frame
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
dict: {
|
| 38 |
+
"average_speed": float,
|
| 39 |
+
"motion_intensity": float,
|
| 40 |
+
"sudden_movements": int
|
| 41 |
+
}
|
| 42 |
+
"""
|
| 43 |
+
return {
|
| 44 |
+
"average_speed": calc_avg_speed(prev_poses, current_poses),
|
| 45 |
+
"motion_intensity": calc_motion_intensity(prev_poses, current_poses),
|
| 46 |
+
"sudden_movements": calc_sudden_movements(prev_poses, current_poses),
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
def calculate_interactions(
|
| 50 |
+
self,
|
| 51 |
+
person_boxes: list[list[float]],
|
| 52 |
+
current_poses: list[list[list[float]]],
|
| 53 |
+
tracked_persons: dict,
|
| 54 |
+
) -> list[dict]:
|
| 55 |
+
"""
|
| 56 |
+
Calculate interactions between people based on bounding boxes and keypoints.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
person_boxes: List of bounding boxes [[x1,y1,x2,y2], ...] for each person
|
| 60 |
+
current_poses: List of keypoints for each person
|
| 61 |
+
tracked_persons: Dict mapping person_id -> last tracked box
|
| 62 |
+
|
| 63 |
+
Returns:
|
| 64 |
+
List of dictionaries describing interactions between people
|
| 65 |
+
"""
|
| 66 |
+
interactions = []
|
| 67 |
+
|
| 68 |
+
if len(person_boxes) < 2:
|
| 69 |
+
return interactions
|
| 70 |
+
|
| 71 |
+
for i in range(len(person_boxes)):
|
| 72 |
+
for j in range(i + 1, len(person_boxes)):
|
| 73 |
+
try:
|
| 74 |
+
# Ensure poses exist for both people
|
| 75 |
+
if i >= len(current_poses) or j >= len(current_poses):
|
| 76 |
+
continue
|
| 77 |
+
|
| 78 |
+
box1, box2 = person_boxes[i], person_boxes[j]
|
| 79 |
+
pose1, pose2 = current_poses[i], current_poses[j]
|
| 80 |
+
|
| 81 |
+
# Find person IDs
|
| 82 |
+
id1, id2 = None, None
|
| 83 |
+
for pid, tracked_box in tracked_persons.items():
|
| 84 |
+
if np.array_equal(box1, tracked_box):
|
| 85 |
+
id1 = pid
|
| 86 |
+
if np.array_equal(box2, tracked_box):
|
| 87 |
+
id2 = pid
|
| 88 |
+
|
| 89 |
+
if id1 is None or id2 is None:
|
| 90 |
+
continue
|
| 91 |
+
|
| 92 |
+
# Build interaction dictionary using utils
|
| 93 |
+
interaction = {
|
| 94 |
+
"person1_idx": i,
|
| 95 |
+
"person2_idx": j,
|
| 96 |
+
"person1_id": id1,
|
| 97 |
+
"person2_id": id2,
|
| 98 |
+
"box1": box1,
|
| 99 |
+
"box2": box2,
|
| 100 |
+
"center1": get_box_center(box1),
|
| 101 |
+
"center2": get_box_center(box2),
|
| 102 |
+
"distance": euclidean_distance(
|
| 103 |
+
get_box_center(box1), get_box_center(box2)
|
| 104 |
+
),
|
| 105 |
+
"relative_distance": relative_distance(box1, box2),
|
| 106 |
+
"keypoints": {
|
| 107 |
+
"person1": pose1,
|
| 108 |
+
"person2": pose2,
|
| 109 |
+
"relative": relative_keypoints(pose1, pose2),
|
| 110 |
+
},
|
| 111 |
+
}
|
| 112 |
+
interactions.append(interaction)
|
| 113 |
+
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"Skipping interaction {i}-{j}: {e}")
|
| 116 |
+
continue
|
| 117 |
+
|
| 118 |
+
return interactions
|
backend/data_extraction/person_tracker.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from backend.utils.iou_utils import calculate_iou
|
| 3 |
+
from backend.utils.id_utils import get_new_id
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class PersonTracker:
|
| 7 |
+
"""
|
| 8 |
+
Tracks people across frames by assigning consistent IDs to bounding boxes.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def __init__(self, inactive_timeout=30):
|
| 12 |
+
self.person_id_counter = 0
|
| 13 |
+
self.tracked_persons = {} # {id: box}
|
| 14 |
+
self.inactive_persons = {} # future use
|
| 15 |
+
self.inactive_timeout = inactive_timeout
|
| 16 |
+
|
| 17 |
+
def assign_person_ids(self, current_boxes):
|
| 18 |
+
"""
|
| 19 |
+
Assign IDs to current frame boxes based on IoU with previous frame.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
current_boxes (list of list): [[x1, y1, x2, y2], ...]
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
dict: {person_id: box} for current frame
|
| 26 |
+
"""
|
| 27 |
+
new_tracked = {}
|
| 28 |
+
used_ids = set()
|
| 29 |
+
|
| 30 |
+
if not self.tracked_persons:
|
| 31 |
+
# First frame - assign new IDs to all boxes
|
| 32 |
+
for box in current_boxes:
|
| 33 |
+
person_id, self.person_id_counter = get_new_id(self.person_id_counter)
|
| 34 |
+
new_tracked[person_id] = box
|
| 35 |
+
else:
|
| 36 |
+
# Convert boxes to numpy arrays
|
| 37 |
+
current_boxes_np = np.array(current_boxes)
|
| 38 |
+
prev_boxes_np = np.array(list(self.tracked_persons.values()))
|
| 39 |
+
|
| 40 |
+
if len(current_boxes_np) > 0 and len(prev_boxes_np) > 0:
|
| 41 |
+
# Compute IoU matrix
|
| 42 |
+
iou_matrix = np.zeros((len(current_boxes_np), len(prev_boxes_np)))
|
| 43 |
+
for i, curr_box in enumerate(current_boxes_np):
|
| 44 |
+
for j, prev_box in enumerate(prev_boxes_np):
|
| 45 |
+
iou_matrix[i, j] = calculate_iou(curr_box, prev_box)
|
| 46 |
+
|
| 47 |
+
# Match boxes based on IoU > 0.3
|
| 48 |
+
matched_pairs = []
|
| 49 |
+
for i in range(len(current_boxes_np)):
|
| 50 |
+
max_j = np.argmax(iou_matrix[i])
|
| 51 |
+
if iou_matrix[i, max_j] > 0.3:
|
| 52 |
+
matched_pairs.append((i, max_j))
|
| 53 |
+
|
| 54 |
+
# Assign matched IDs
|
| 55 |
+
prev_ids = list(self.tracked_persons.keys())
|
| 56 |
+
for i, j in matched_pairs:
|
| 57 |
+
person_id = prev_ids[j]
|
| 58 |
+
new_tracked[person_id] = current_boxes_np[i]
|
| 59 |
+
used_ids.add(person_id)
|
| 60 |
+
|
| 61 |
+
# Assign new IDs to unmatched boxes
|
| 62 |
+
for i, box in enumerate(current_boxes_np):
|
| 63 |
+
if i not in [pair[0] for pair in matched_pairs]:
|
| 64 |
+
person_id, self.person_id_counter = get_new_id(
|
| 65 |
+
self.person_id_counter
|
| 66 |
+
)
|
| 67 |
+
new_tracked[person_id] = box
|
| 68 |
+
|
| 69 |
+
self.tracked_persons = new_tracked
|
| 70 |
+
return new_tracked
|
| 71 |
+
|
| 72 |
+
def reset(self):
|
| 73 |
+
"""Reset the tracker for a new video."""
|
| 74 |
+
self.person_id_counter = 0
|
| 75 |
+
self.tracked_persons = {}
|
| 76 |
+
self.inactive_persons = {}
|
backend/feature_extraction/__init__.py
ADDED
|
File without changes
|
backend/feature_extraction/extractor.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from backend.config import DETECT_MODEL, POSE_MODEL, CONF_THRESHOLD
|
| 3 |
+
from backend.utils.gpu import GPUConfigurator
|
| 4 |
+
from backend.preprocessing.preprocessor import FramePreprocessor
|
| 5 |
+
from backend.data_extraction.interaction_analyzer import InteractionAnalyzer
|
| 6 |
+
from backend.data_extraction.person_tracker import PersonTracker
|
| 7 |
+
from backend.utils.visualizer import Visualizer
|
| 8 |
+
import numpy as np
|
| 9 |
+
from ultralytics import YOLO
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class VideoFeatureExtractor:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.gpu_config = GPUConfigurator()
|
| 15 |
+
self.device = self.gpu_config.device
|
| 16 |
+
|
| 17 |
+
self.detection_model = YOLO(DETECT_MODEL).to(self.device)
|
| 18 |
+
self.pose_model = YOLO(POSE_MODEL).to(self.device)
|
| 19 |
+
|
| 20 |
+
self.preprocessor = FramePreprocessor()
|
| 21 |
+
self.interaction_analyzer = InteractionAnalyzer()
|
| 22 |
+
self.person_tracker = PersonTracker()
|
| 23 |
+
self.visualizer = Visualizer()
|
| 24 |
+
|
| 25 |
+
self.conf_threshold = CONF_THRESHOLD
|
| 26 |
+
self.prev_poses = None
|
| 27 |
+
|
| 28 |
+
self.person_tracker.reset()
|
| 29 |
+
self.prev_poses = None
|
| 30 |
+
|
| 31 |
+
def extract_features(self, frame, frame_idx):
|
| 32 |
+
"""Extract features from a frame."""
|
| 33 |
+
try:
|
| 34 |
+
processed_frame, scale_info = self.preprocessor.preprocess_frame(frame)
|
| 35 |
+
if processed_frame is None:
|
| 36 |
+
return None, frame
|
| 37 |
+
|
| 38 |
+
frame_tensor = (
|
| 39 |
+
torch.from_numpy(processed_frame)
|
| 40 |
+
.permute(2, 0, 1)
|
| 41 |
+
.unsqueeze(0)
|
| 42 |
+
.to(self.device)
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
if frame_idx % 5 == 0:
|
| 46 |
+
torch.cuda.empty_cache()
|
| 47 |
+
|
| 48 |
+
with (
|
| 49 |
+
torch.no_grad(),
|
| 50 |
+
torch.amp.autocast(device_type="cuda", dtype=torch.float16),
|
| 51 |
+
):
|
| 52 |
+
det_results = self.detection_model(
|
| 53 |
+
frame_tensor, conf=self.conf_threshold, verbose=False
|
| 54 |
+
)
|
| 55 |
+
pose_results = (
|
| 56 |
+
self.pose_model(
|
| 57 |
+
frame_tensor, conf=self.conf_threshold, verbose=False
|
| 58 |
+
)
|
| 59 |
+
if len(det_results[0].boxes) > 0
|
| 60 |
+
else []
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
frame_data = {
|
| 64 |
+
"frame_index": frame_idx,
|
| 65 |
+
"timestamp": frame_idx / 30,
|
| 66 |
+
"persons": [],
|
| 67 |
+
"objects": [],
|
| 68 |
+
"interactions": [],
|
| 69 |
+
"resized_width": scale_info.get("resized_size", (0, 0))[1],
|
| 70 |
+
"resized_height": scale_info.get("resized_size", (0, 0))[0],
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
# Process detections
|
| 74 |
+
person_boxes = []
|
| 75 |
+
for result in det_results:
|
| 76 |
+
for box in result.boxes:
|
| 77 |
+
try:
|
| 78 |
+
cls = result.names[int(box.cls[0])]
|
| 79 |
+
box_coords = box.xyxy[0].cpu().numpy().tolist()
|
| 80 |
+
if cls == "person":
|
| 81 |
+
person_boxes.append(box_coords)
|
| 82 |
+
else:
|
| 83 |
+
frame_data["objects"].append(
|
| 84 |
+
{
|
| 85 |
+
"class": cls,
|
| 86 |
+
"confidence": float(box.conf[0]),
|
| 87 |
+
"box": box_coords,
|
| 88 |
+
}
|
| 89 |
+
)
|
| 90 |
+
except Exception as e:
|
| 91 |
+
print(f"Detection processing error: {e}")
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
# Track persons
|
| 95 |
+
tracked_persons = self.person_tracker.assign_person_ids(person_boxes)
|
| 96 |
+
|
| 97 |
+
# Process poses
|
| 98 |
+
current_poses = []
|
| 99 |
+
if pose_results:
|
| 100 |
+
for result in pose_results:
|
| 101 |
+
if result.keypoints:
|
| 102 |
+
for kpts in result.keypoints:
|
| 103 |
+
try:
|
| 104 |
+
pose_data = kpts.data[0].cpu().numpy().tolist()
|
| 105 |
+
current_poses.append(pose_data)
|
| 106 |
+
except Exception as e:
|
| 107 |
+
print(f"Pose processing error: {e}")
|
| 108 |
+
continue
|
| 109 |
+
|
| 110 |
+
# Match persons to poses
|
| 111 |
+
frame_data["persons"] = []
|
| 112 |
+
for i, box in enumerate(person_boxes):
|
| 113 |
+
try:
|
| 114 |
+
pose = current_poses[i] if i < len(current_poses) else None
|
| 115 |
+
if pose is None:
|
| 116 |
+
continue
|
| 117 |
+
|
| 118 |
+
# Find the person ID for this box
|
| 119 |
+
person_id = None
|
| 120 |
+
for pid, tracked_box in tracked_persons.items():
|
| 121 |
+
if np.array_equal(box, tracked_box):
|
| 122 |
+
person_id = pid
|
| 123 |
+
break
|
| 124 |
+
|
| 125 |
+
if person_id is None:
|
| 126 |
+
continue
|
| 127 |
+
|
| 128 |
+
frame_data["persons"].append(
|
| 129 |
+
{
|
| 130 |
+
"person_idx": i,
|
| 131 |
+
"person_id": person_id,
|
| 132 |
+
"box": box,
|
| 133 |
+
"center": [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2],
|
| 134 |
+
"keypoints": pose,
|
| 135 |
+
}
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f"Skipping person {i} due to error: {e}")
|
| 140 |
+
continue
|
| 141 |
+
|
| 142 |
+
# Calculate motion features
|
| 143 |
+
motion_features = {
|
| 144 |
+
"average_speed": 0,
|
| 145 |
+
"motion_intensity": 0,
|
| 146 |
+
"sudden_movements": 0,
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
if self.prev_poses and current_poses:
|
| 150 |
+
try:
|
| 151 |
+
motion_features = (
|
| 152 |
+
self.interaction_analyzer.calculate_motion_features(
|
| 153 |
+
self.prev_poses, current_poses
|
| 154 |
+
)
|
| 155 |
+
)
|
| 156 |
+
except Exception as e:
|
| 157 |
+
print(f"Motion calculation error: {e}")
|
| 158 |
+
|
| 159 |
+
frame_data["motion_features"] = motion_features
|
| 160 |
+
self.prev_poses = current_poses
|
| 161 |
+
|
| 162 |
+
# Create interactions
|
| 163 |
+
frame_data["interactions"] = (
|
| 164 |
+
self.interaction_analyzer.calculate_interactions(
|
| 165 |
+
person_boxes, current_poses, tracked_persons
|
| 166 |
+
)
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
# Add motion features to frame data
|
| 170 |
+
|
| 171 |
+
annotated_frame = self.visualizer.draw_detections(
|
| 172 |
+
frame, det_results, pose_results, scale_info, tracked_persons
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
return frame_data, annotated_frame
|
| 176 |
+
|
| 177 |
+
except Exception as e:
|
| 178 |
+
print(f"Frame {frame_idx} failed completely: {e}")
|
| 179 |
+
return None, frame
|
| 180 |
+
|
| 181 |
+
def reset(self):
|
| 182 |
+
"""Reset state for a new video."""
|
| 183 |
+
self.person_tracker.reset()
|
| 184 |
+
self.prev_poses = None
|
backend/models/yolov8n-pose.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f80660bc2f97d664d86fc9f50fd5903af392fe332c0d603fa0dd6c78bf8844c
|
| 3 |
+
size 6828990
|
backend/models/yolov8n.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31e20dde3def09e2cf938c7be6fe23d9150bbbe503982af13345706515f2ef95
|
| 3 |
+
size 6534387
|
backend/preprocessing/__init__.py
ADDED
|
File without changes
|
backend/preprocessing/preprocessor.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class FramePreprocessor:
|
| 6 |
+
def __init__(self, input_size=640):
|
| 7 |
+
self.input_size = input_size
|
| 8 |
+
|
| 9 |
+
def set_resolution_config(self, frame_width, frame_height):
|
| 10 |
+
"""Set appropriate configuration based on video resolution"""
|
| 11 |
+
max_dim = max(frame_width, frame_height)
|
| 12 |
+
|
| 13 |
+
# Adjust configuration based on resolution
|
| 14 |
+
if max_dim > 2560: # 4K
|
| 15 |
+
frame_skip = 2
|
| 16 |
+
batch_size = 1
|
| 17 |
+
elif max_dim > 1920: # 2K
|
| 18 |
+
frame_skip = 2
|
| 19 |
+
batch_size = 1
|
| 20 |
+
elif max_dim > 1280: # Full HD
|
| 21 |
+
frame_skip = 1
|
| 22 |
+
batch_size = 2
|
| 23 |
+
else: # HD or lower
|
| 24 |
+
frame_skip = 1
|
| 25 |
+
batch_size = 4
|
| 26 |
+
|
| 27 |
+
return batch_size, frame_skip
|
| 28 |
+
|
| 29 |
+
def preprocess_frame(self, frame):
|
| 30 |
+
"""Preprocess frame while maintaining aspect ratio and handling high-res inputs"""
|
| 31 |
+
try:
|
| 32 |
+
# Convert BGR to RGB
|
| 33 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 34 |
+
original_h, original_w = frame_rgb.shape[:2]
|
| 35 |
+
|
| 36 |
+
# Calculate target size maintaining aspect ratio
|
| 37 |
+
scale = self.input_size / max(original_w, original_h)
|
| 38 |
+
target_w = int(original_w * scale)
|
| 39 |
+
target_h = int(original_h * scale)
|
| 40 |
+
|
| 41 |
+
# Resize image
|
| 42 |
+
resized = cv2.resize(
|
| 43 |
+
frame_rgb, (target_w, target_h), interpolation=cv2.INTER_AREA
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Create square canvas
|
| 47 |
+
canvas = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
|
| 48 |
+
|
| 49 |
+
# Calculate padding
|
| 50 |
+
pad_h = (self.input_size - target_h) // 2
|
| 51 |
+
pad_w = (self.input_size - target_w) // 2
|
| 52 |
+
|
| 53 |
+
# Place resized image on canvas
|
| 54 |
+
canvas[pad_h : pad_h + target_h, pad_w : pad_w + target_w] = resized
|
| 55 |
+
|
| 56 |
+
# Normalize
|
| 57 |
+
normalized = canvas.astype(np.float32) / 255.0
|
| 58 |
+
|
| 59 |
+
# Store scaling info
|
| 60 |
+
scale_info = {
|
| 61 |
+
"scale": scale,
|
| 62 |
+
"pad_w": pad_w,
|
| 63 |
+
"pad_h": pad_h,
|
| 64 |
+
"original_size": (original_h, original_w),
|
| 65 |
+
"resized_size": (target_h, target_w),
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
return normalized, scale_info
|
| 69 |
+
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"Preprocessing error: {e}")
|
| 72 |
+
return None, None
|
| 73 |
+
|
| 74 |
+
def rescale_coords(self, x, y, scale_info):
|
| 75 |
+
"""Convert model coordinates back to original video dimensions"""
|
| 76 |
+
try:
|
| 77 |
+
scale = scale_info["scale"]
|
| 78 |
+
pad_w = scale_info["pad_w"]
|
| 79 |
+
pad_h = scale_info["pad_h"]
|
| 80 |
+
original_h, original_w = scale_info["original_size"]
|
| 81 |
+
|
| 82 |
+
# Remove padding and scale back to original dimensions
|
| 83 |
+
x_orig = int((x - pad_w) / scale)
|
| 84 |
+
y_orig = int((y - pad_h) / scale)
|
| 85 |
+
|
| 86 |
+
# Ensure coordinates are within bounds
|
| 87 |
+
x_orig = max(0, min(x_orig, original_w - 1))
|
| 88 |
+
y_orig = max(0, min(y_orig, original_h - 1))
|
| 89 |
+
|
| 90 |
+
return (x_orig, y_orig)
|
| 91 |
+
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"Rescaling error: {e}")
|
| 94 |
+
return (0, 0)
|
backend/services/__init__.py
ADDED
|
File without changes
|
backend/services/prediction/__init__.py
ADDED
|
File without changes
|
backend/services/prediction/predictor.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class ViolencePredictor:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.scaler = MinMaxScaler()
|
| 8 |
+
|
| 9 |
+
def preprocess_data(self, df):
|
| 10 |
+
# Normalize coordinates, distances and keypoints
|
| 11 |
+
# Drop confidence columns
|
| 12 |
+
# Scale selected columns
|
| 13 |
+
# Similar as existing code...
|
| 14 |
+
"""
|
| 15 |
+
Preprocess the data by normalizing box coordinates, center coordinates, distances, and keypoints.
|
| 16 |
+
"""
|
| 17 |
+
# Normalize box coordinates
|
| 18 |
+
frame_height = df["frame_height"]
|
| 19 |
+
frame_width = df["frame_width"]
|
| 20 |
+
df["box1_x_min"] = df["box1_x_min"] / frame_width
|
| 21 |
+
df["box1_y_min"] = df["box1_y_min"] / frame_height
|
| 22 |
+
df["box1_x_max"] = df["box1_x_max"] / frame_width
|
| 23 |
+
df["box1_y_max"] = df["box1_y_max"] / frame_height
|
| 24 |
+
|
| 25 |
+
df["box2_x_min"] = df["box2_x_min"] / frame_width
|
| 26 |
+
df["box2_y_min"] = df["box2_y_min"] / frame_height
|
| 27 |
+
df["box2_x_max"] = df["box2_x_max"] / frame_width
|
| 28 |
+
df["box2_y_max"] = df["box2_y_max"] / frame_height
|
| 29 |
+
|
| 30 |
+
# Normalize center coordinates
|
| 31 |
+
df["center1_x"] = df["center1_x"] / frame_width
|
| 32 |
+
df["center1_y"] = df["center1_y"] / frame_height
|
| 33 |
+
|
| 34 |
+
df["center2_x"] = df["center2_x"] / frame_width
|
| 35 |
+
df["center2_y"] = df["center2_y"] / frame_height
|
| 36 |
+
|
| 37 |
+
# Normalize distances
|
| 38 |
+
max_distance = np.sqrt(frame_width**2 + frame_height**2)
|
| 39 |
+
df["distance"] = df["distance"] / max_distance
|
| 40 |
+
df["relative_distance"] = df["relative_distance"] / max_distance
|
| 41 |
+
|
| 42 |
+
# Drop confidence columns
|
| 43 |
+
drop_columns = (
|
| 44 |
+
[f"person1_kp{i}_conf" for i in range(17)]
|
| 45 |
+
+ [f"person2_kp{i}_conf" for i in range(17)]
|
| 46 |
+
+ [f"relative_kp{i}_conf" for i in range(17)]
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
existing_columns = [col for col in drop_columns if col in df.columns]
|
| 50 |
+
df = df.drop(columns=existing_columns)
|
| 51 |
+
|
| 52 |
+
# Normalize keypoints
|
| 53 |
+
for i in range(17):
|
| 54 |
+
for prefix in ["person1_kp", "person2_kp", "relative_kp"]:
|
| 55 |
+
x_col = f"{prefix}{i}_x"
|
| 56 |
+
y_col = f"{prefix}{i}_y"
|
| 57 |
+
|
| 58 |
+
if x_col in df.columns:
|
| 59 |
+
df[x_col] = df[x_col] / frame_width
|
| 60 |
+
if y_col in df.columns:
|
| 61 |
+
df[y_col] = df[y_col] / frame_height
|
| 62 |
+
|
| 63 |
+
# Scale specific columns
|
| 64 |
+
df["distance"] = self.scaler.fit_transform(df[["distance"]])
|
| 65 |
+
df["relative_distance"] = self.scaler.fit_transform(df[["relative_distance"]])
|
| 66 |
+
df["motion_average_speed"] = self.scaler.fit_transform(
|
| 67 |
+
df[["motion_average_speed"]]
|
| 68 |
+
)
|
| 69 |
+
df["motion_motion_intensity"] = self.scaler.fit_transform(
|
| 70 |
+
df[["motion_motion_intensity"]]
|
| 71 |
+
)
|
| 72 |
+
return df
|
| 73 |
+
|
| 74 |
+
def predict(self, data):
|
| 75 |
+
return 0
|
backend/services/video_data_extraction/__init__.py
ADDED
|
File without changes
|
backend/services/video_data_extraction/video_preprocessor.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import cv2
|
| 3 |
+
import torch
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from backend.feature_extraction.extractor import VideoFeatureExtractor
|
| 6 |
+
from backend.utils.csv_utils import _create_interaction_row
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class VideoDataExtractor:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.extractor = VideoFeatureExtractor()
|
| 12 |
+
|
| 13 |
+
def extract_video_data(
|
| 14 |
+
self,
|
| 15 |
+
video_path,
|
| 16 |
+
output_csv_path,
|
| 17 |
+
output_folder=None,
|
| 18 |
+
show_video=False,
|
| 19 |
+
save_video=False,
|
| 20 |
+
):
|
| 21 |
+
"""
|
| 22 |
+
Extract data from a video file.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
video_path: Path to input video
|
| 26 |
+
output_csv_path: Path to save CSV output
|
| 27 |
+
output_folder: Folder to save output video
|
| 28 |
+
show_video: Whether to display video during processing
|
| 29 |
+
save_video: Whether to save output video
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Tuple of (frame_width, frame_height, num_interactions)
|
| 33 |
+
"""
|
| 34 |
+
cap = None
|
| 35 |
+
video_writer = None
|
| 36 |
+
csv_data = []
|
| 37 |
+
seen_interactions = set()
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
if not os.path.exists(video_path):
|
| 41 |
+
raise FileNotFoundError(f"Video file not found: {video_path}")
|
| 42 |
+
|
| 43 |
+
cap = cv2.VideoCapture(video_path)
|
| 44 |
+
if not cap.isOpened():
|
| 45 |
+
raise ValueError("Error: Could not open video file")
|
| 46 |
+
|
| 47 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 48 |
+
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 49 |
+
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 50 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 51 |
+
|
| 52 |
+
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
| 53 |
+
|
| 54 |
+
# Set frame skip based on resolution
|
| 55 |
+
batch_size, frame_skip = self.extractor.preprocessor.set_resolution_config(
|
| 56 |
+
frame_width, frame_height
|
| 57 |
+
)
|
| 58 |
+
self.extractor.preprocessor.frame_skip = frame_skip
|
| 59 |
+
|
| 60 |
+
print(f"Processing video: {frame_width}x{frame_height} at {fps} fps")
|
| 61 |
+
print(f"Using frame_skip: {frame_skip}")
|
| 62 |
+
|
| 63 |
+
# Initialize video writer if needed
|
| 64 |
+
if output_folder and save_video:
|
| 65 |
+
os.makedirs(output_folder, exist_ok=True)
|
| 66 |
+
output_video_path = os.path.join(
|
| 67 |
+
output_folder, f"{video_name}_detections.mp4"
|
| 68 |
+
)
|
| 69 |
+
video_writer = cv2.VideoWriter(
|
| 70 |
+
output_video_path,
|
| 71 |
+
cv2.VideoWriter_fourcc(*"mp4v"),
|
| 72 |
+
fps / frame_skip,
|
| 73 |
+
(frame_width, frame_height),
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# Reset extractor for new video
|
| 77 |
+
self.extractor.reset()
|
| 78 |
+
|
| 79 |
+
# Process frames
|
| 80 |
+
for frame_idx in range(0, total_frames, frame_skip):
|
| 81 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
| 82 |
+
ret, frame = cap.read()
|
| 83 |
+
if not ret:
|
| 84 |
+
break
|
| 85 |
+
|
| 86 |
+
# Extract features
|
| 87 |
+
frame_data, annotated_frame = self.extractor.extract_features(
|
| 88 |
+
frame, frame_idx
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
if frame_data is not None:
|
| 92 |
+
# Process interactions
|
| 93 |
+
for interaction in frame_data["interactions"]:
|
| 94 |
+
interaction_id = (
|
| 95 |
+
interaction["person1_id"],
|
| 96 |
+
interaction["person2_id"],
|
| 97 |
+
frame_idx,
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
if interaction_id not in seen_interactions:
|
| 101 |
+
seen_interactions.add(interaction_id)
|
| 102 |
+
row = _create_interaction_row(
|
| 103 |
+
video_name,
|
| 104 |
+
frame_data,
|
| 105 |
+
interaction,
|
| 106 |
+
frame_width,
|
| 107 |
+
frame_height,
|
| 108 |
+
)
|
| 109 |
+
csv_data.append(row)
|
| 110 |
+
|
| 111 |
+
# Write frame to output video
|
| 112 |
+
if video_writer is not None and annotated_frame is not None:
|
| 113 |
+
video_writer.write(annotated_frame)
|
| 114 |
+
|
| 115 |
+
# Show video if enabled
|
| 116 |
+
if show_video and annotated_frame is not None:
|
| 117 |
+
cv2.imshow("Video Data Extraction", annotated_frame)
|
| 118 |
+
key = cv2.waitKey(1) & 0xFF
|
| 119 |
+
if key == ord("q"):
|
| 120 |
+
break
|
| 121 |
+
|
| 122 |
+
# Clear memory periodically
|
| 123 |
+
if frame_idx % 100 == 0:
|
| 124 |
+
torch.cuda.empty_cache()
|
| 125 |
+
|
| 126 |
+
if csv_data:
|
| 127 |
+
df = pd.DataFrame(csv_data)
|
| 128 |
+
|
| 129 |
+
if os.path.exists(output_csv_path):
|
| 130 |
+
# Append to existing CSV
|
| 131 |
+
df.to_csv(output_csv_path, mode="a", header=False, index=False)
|
| 132 |
+
print(f"Appended {len(csv_data)} interactions to {output_csv_path}")
|
| 133 |
+
else:
|
| 134 |
+
# Save new CSV
|
| 135 |
+
df.to_csv(output_csv_path, index=False)
|
| 136 |
+
print(f"Saved {len(csv_data)} interactions to {output_csv_path}")
|
| 137 |
+
|
| 138 |
+
return frame_width, frame_height, len(csv_data)
|
| 139 |
+
|
| 140 |
+
finally:
|
| 141 |
+
if cap is not None:
|
| 142 |
+
cap.release()
|
| 143 |
+
if video_writer is not None:
|
| 144 |
+
video_writer.release()
|
| 145 |
+
cv2.destroyAllWindows()
|
| 146 |
+
torch.cuda.empty_cache()
|
backend/utils/__init__.py
ADDED
|
File without changes
|
backend/utils/csv_utils.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def _create_interaction_row(
|
| 2 |
+
video_name, frame_data, interaction, frame_width, frame_height
|
| 3 |
+
):
|
| 4 |
+
"""Create a row of interaction data for CSV output."""
|
| 5 |
+
row = {
|
| 6 |
+
"video_name": video_name,
|
| 7 |
+
"frame_index": frame_data["frame_index"],
|
| 8 |
+
"timestamp": frame_data["timestamp"],
|
| 9 |
+
"frame_width": frame_width,
|
| 10 |
+
"frame_height": frame_height,
|
| 11 |
+
"person1_id": interaction["person1_id"],
|
| 12 |
+
"person2_id": interaction["person2_id"],
|
| 13 |
+
"box1_x_min": interaction["box1"][0],
|
| 14 |
+
"box1_y_min": interaction["box1"][1],
|
| 15 |
+
"box1_x_max": interaction["box1"][2],
|
| 16 |
+
"box1_y_max": interaction["box1"][3],
|
| 17 |
+
"box2_x_min": interaction["box2"][0],
|
| 18 |
+
"box2_y_min": interaction["box2"][1],
|
| 19 |
+
"box2_x_max": interaction["box2"][2],
|
| 20 |
+
"box2_y_max": interaction["box2"][3],
|
| 21 |
+
"center1_x": interaction["center1"][0],
|
| 22 |
+
"center1_y": interaction["center1"][1],
|
| 23 |
+
"center2_x": interaction["center2"][0],
|
| 24 |
+
"center2_y": interaction["center2"][1],
|
| 25 |
+
"distance": interaction["distance"],
|
| 26 |
+
"person1_idx": interaction["person1_idx"],
|
| 27 |
+
"person2_idx": interaction["person2_idx"],
|
| 28 |
+
"relative_distance": interaction["relative_distance"],
|
| 29 |
+
"motion_average_speed": frame_data["motion_features"]["average_speed"],
|
| 30 |
+
"motion_motion_intensity": frame_data["motion_features"]["motion_intensity"],
|
| 31 |
+
"motion_sudden_movements": frame_data["motion_features"]["sudden_movements"],
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# Add keypoints data
|
| 35 |
+
keypoints_data = interaction["keypoints"]
|
| 36 |
+
for prefix in ["person1_kp", "person2_kp", "relative_kp"]:
|
| 37 |
+
for i in range(17):
|
| 38 |
+
for dim in ["_x", "_y", "_conf"]:
|
| 39 |
+
row[f"{prefix}{i}{dim}"] = None
|
| 40 |
+
|
| 41 |
+
# Fill in actual keypoint values if they exist
|
| 42 |
+
if isinstance(keypoints_data, dict):
|
| 43 |
+
for person_prefix, kp_data in [
|
| 44 |
+
("person1_kp", keypoints_data.get("person1")),
|
| 45 |
+
("person2_kp", keypoints_data.get("person2")),
|
| 46 |
+
("relative_kp", keypoints_data.get("relative")),
|
| 47 |
+
]:
|
| 48 |
+
if isinstance(kp_data, list):
|
| 49 |
+
for i, kp in enumerate(kp_data):
|
| 50 |
+
if i >= 17:
|
| 51 |
+
continue
|
| 52 |
+
if isinstance(kp, (list, tuple)) and len(kp) >= 3:
|
| 53 |
+
row[f"{person_prefix}{i}_x"] = float(kp[0])
|
| 54 |
+
row[f"{person_prefix}{i}_y"] = float(kp[1])
|
| 55 |
+
row[f"{person_prefix}{i}_conf"] = float(kp[2])
|
| 56 |
+
|
| 57 |
+
return row
|
backend/utils/gpu.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
class GPUConfigurator:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.device = self._setup_device()
|
| 7 |
+
self._configure_gpu()
|
| 8 |
+
|
| 9 |
+
def _setup_device(self):
|
| 10 |
+
if torch.cuda.is_available():
|
| 11 |
+
device = torch.device("cuda")
|
| 12 |
+
torch.zeros(1).to(device)
|
| 13 |
+
torch.cuda.synchronize()
|
| 14 |
+
print(f"Using GPU: {torch.cuda.get_device_name(0)}")
|
| 15 |
+
return device
|
| 16 |
+
print("No GPU available. Using CPU.")
|
| 17 |
+
return torch.device("cpu")
|
| 18 |
+
|
| 19 |
+
def _configure_gpu(self):
|
| 20 |
+
if self.device.type == 'cuda':
|
| 21 |
+
torch.backends.cudnn.benchmark = True
|
| 22 |
+
torch.set_float32_matmul_precision('high')
|
| 23 |
+
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
|
backend/utils/id_utils.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def get_new_id(counter):
|
| 2 |
+
"""Return a new ID and increment counter."""
|
| 3 |
+
new_id = counter
|
| 4 |
+
counter += 1
|
| 5 |
+
return new_id, counter
|
backend/utils/interaction_utils.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def get_box_center(box):
|
| 5 |
+
"""
|
| 6 |
+
Calculate the center of a bounding box.
|
| 7 |
+
|
| 8 |
+
box: [x1, y1, x2, y2]
|
| 9 |
+
returns: [center_x, center_y]
|
| 10 |
+
"""
|
| 11 |
+
return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def euclidean_distance(point1, point2):
|
| 15 |
+
"""
|
| 16 |
+
Compute Euclidean distance between two points.
|
| 17 |
+
|
| 18 |
+
point1, point2: [x, y]
|
| 19 |
+
returns: float
|
| 20 |
+
"""
|
| 21 |
+
return float(np.linalg.norm(np.array(point1) - np.array(point2)))
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def relative_distance(box1, box2):
|
| 25 |
+
"""
|
| 26 |
+
Compute relative distance between two boxes.
|
| 27 |
+
|
| 28 |
+
Returns distance normalized by sqrt(average box area)
|
| 29 |
+
"""
|
| 30 |
+
center1 = get_box_center(box1)
|
| 31 |
+
center2 = get_box_center(box2)
|
| 32 |
+
distance = euclidean_distance(center1, center2)
|
| 33 |
+
|
| 34 |
+
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
| 35 |
+
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
|
| 36 |
+
avg_area = (area1 + area2) / 2
|
| 37 |
+
|
| 38 |
+
return distance / (avg_area**0.5)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def relative_keypoints(pose1, pose2):
|
| 42 |
+
"""
|
| 43 |
+
Compute difference between keypoints of two people.
|
| 44 |
+
|
| 45 |
+
Returns a list of [dx, dy] for each keypoint.
|
| 46 |
+
"""
|
| 47 |
+
return (np.array(pose2) - np.array(pose1)).tolist()
|
backend/utils/iou_utils.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def calculate_iou(box1, box2):
|
| 2 |
+
"""Compute IoU between two boxes."""
|
| 3 |
+
xA = max(box1[0], box2[0])
|
| 4 |
+
yA = max(box1[1], box2[1])
|
| 5 |
+
xB = min(box1[2], box2[2])
|
| 6 |
+
yB = min(box1[3], box2[3])
|
| 7 |
+
|
| 8 |
+
inter_area = max(0, xB - xA) * max(0, yB - yA)
|
| 9 |
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
| 10 |
+
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
|
| 11 |
+
union_area = box1_area + box2_area - inter_area
|
| 12 |
+
|
| 13 |
+
return inter_area / union_area if union_area > 0 else 0
|
backend/utils/motion_utils.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def calc_avg_speed(prev_poses: list, current_poses: list) -> float:
|
| 5 |
+
if not prev_poses or not current_poses:
|
| 6 |
+
return 0.0
|
| 7 |
+
|
| 8 |
+
prev_poses = np.array(prev_poses)
|
| 9 |
+
current_poses = np.array(current_poses)
|
| 10 |
+
|
| 11 |
+
if prev_poses.shape != current_poses.shape:
|
| 12 |
+
return 0.0
|
| 13 |
+
|
| 14 |
+
displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
|
| 15 |
+
return float(np.mean(displacement))
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def calc_motion_intensity(prev_poses: list, current_poses: list) -> float:
|
| 19 |
+
if not prev_poses or not current_poses:
|
| 20 |
+
return 0.0
|
| 21 |
+
|
| 22 |
+
prev_poses = np.array(prev_poses)
|
| 23 |
+
current_poses = np.array(current_poses)
|
| 24 |
+
|
| 25 |
+
if prev_poses.shape != current_poses.shape:
|
| 26 |
+
return 0.0
|
| 27 |
+
|
| 28 |
+
displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
|
| 29 |
+
return float(np.std(displacement))
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def calc_sudden_movements(prev_poses: list, current_poses: list) -> int:
|
| 33 |
+
if not prev_poses or not current_poses:
|
| 34 |
+
return 0
|
| 35 |
+
|
| 36 |
+
prev_poses = np.array(prev_poses)
|
| 37 |
+
current_poses = np.array(current_poses)
|
| 38 |
+
|
| 39 |
+
if prev_poses.shape != current_poses.shape:
|
| 40 |
+
return 0
|
| 41 |
+
|
| 42 |
+
displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
|
| 43 |
+
threshold = np.mean(displacement) + 2 * np.std(displacement)
|
| 44 |
+
return int(np.sum(displacement > threshold))
|
backend/utils/visualizer.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
class Visualizer:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.colors = {
|
| 7 |
+
"person": (0, 255, 0), # Green
|
| 8 |
+
"keypoint": (255, 255, 0), # Yellow
|
| 9 |
+
"connection": (0, 255, 255)# Cyan
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
def rescale_coords(self, x, y, scale_info):
|
| 13 |
+
"""Convert model coordinates back to original video dimensions"""
|
| 14 |
+
scale = scale_info['scale']
|
| 15 |
+
pad_w = scale_info['pad_w']
|
| 16 |
+
pad_h = scale_info['pad_h']
|
| 17 |
+
original_h, original_w = scale_info['original_size']
|
| 18 |
+
|
| 19 |
+
# Remove padding and scale back to original dimensions
|
| 20 |
+
x_orig = int((x - pad_w) / scale)
|
| 21 |
+
y_orig = int((y - pad_h) / scale)
|
| 22 |
+
|
| 23 |
+
# Ensure coordinates are within bounds
|
| 24 |
+
x_orig = max(0, min(x_orig, original_w - 1))
|
| 25 |
+
y_orig = max(0, min(y_orig, original_h - 1))
|
| 26 |
+
|
| 27 |
+
return (x_orig, y_orig)
|
| 28 |
+
|
| 29 |
+
def draw_detections(self, frame, det_results, pose_results, scale_info, tracked_persons):
|
| 30 |
+
"""Draw detections and poses on the frame."""
|
| 31 |
+
try:
|
| 32 |
+
display_frame = frame.copy()
|
| 33 |
+
|
| 34 |
+
# Draw person boxes with IDs first
|
| 35 |
+
for person_id, box in tracked_persons.items():
|
| 36 |
+
try:
|
| 37 |
+
if len(box) != 4:
|
| 38 |
+
continue
|
| 39 |
+
|
| 40 |
+
x1, y1, x2, y2 = map(float, box)
|
| 41 |
+
x1, y1 = self.rescale_coords(x1, y1, scale_info)
|
| 42 |
+
x2, y2 = self.rescale_coords(x2, y2, scale_info)
|
| 43 |
+
|
| 44 |
+
# Ensure coordinates are valid
|
| 45 |
+
if any(coord < 0 for coord in [x1, y1, x2, y2]):
|
| 46 |
+
continue
|
| 47 |
+
|
| 48 |
+
# Draw person box
|
| 49 |
+
cv2.rectangle(display_frame,
|
| 50 |
+
(int(x1), int(y1)),
|
| 51 |
+
(int(x2), int(y2)),
|
| 52 |
+
self.colors["person"],
|
| 53 |
+
2)
|
| 54 |
+
|
| 55 |
+
# Draw person ID
|
| 56 |
+
id_text = f"ID:{person_id}"
|
| 57 |
+
(text_w, text_h), _ = cv2.getTextSize(
|
| 58 |
+
id_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2
|
| 59 |
+
)
|
| 60 |
+
cv2.rectangle(
|
| 61 |
+
display_frame,
|
| 62 |
+
(int(x2 - text_w - 5), int(y1)),
|
| 63 |
+
(int(x2), int(y1 + text_h + 5)),
|
| 64 |
+
self.colors["person"],
|
| 65 |
+
-1,
|
| 66 |
+
)
|
| 67 |
+
cv2.putText(
|
| 68 |
+
display_frame,
|
| 69 |
+
id_text,
|
| 70 |
+
(int(x2 - text_w - 2), int(y1 + text_h + 2)),
|
| 71 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
| 72 |
+
0.5,
|
| 73 |
+
(0, 0, 255),
|
| 74 |
+
1,
|
| 75 |
+
)
|
| 76 |
+
except Exception as e:
|
| 77 |
+
print(f"Error drawing person ID {person_id}: {e}")
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
+
# Draw keypoints
|
| 81 |
+
if pose_results:
|
| 82 |
+
for result in pose_results:
|
| 83 |
+
if result.keypoints:
|
| 84 |
+
for kpts in result.keypoints:
|
| 85 |
+
try:
|
| 86 |
+
keypoints = kpts.data[0].cpu().numpy()
|
| 87 |
+
for kp in keypoints:
|
| 88 |
+
x, y, conf = kp
|
| 89 |
+
if conf > 0.5: # Only draw keypoints with high confidence
|
| 90 |
+
x, y = self.rescale_coords(x, y, scale_info)
|
| 91 |
+
cv2.circle(display_frame, (int(x), int(y)), 3, self.colors["keypoint"], -1)
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"Error drawing keypoints: {e}")
|
| 94 |
+
continue
|
| 95 |
+
|
| 96 |
+
return display_frame
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"Error in draw_detections: {e}")
|
| 100 |
+
return frame
|