Spaces:

harmesh95
/

vio

Sleeping

App Files Files Community

harmesh95 commited on Sep 30, 2025

Commit

a090915

verified ·

1 Parent(s): 453aad4

Upload 24 files

Browse files

Files changed (24) hide show

backend/__init__.py +0 -0
backend/config.py +17 -0
backend/data_extraction/__init__.py +0 -0
backend/data_extraction/interaction_analyzer.py +118 -0
backend/data_extraction/person_tracker.py +76 -0
backend/feature_extraction/__init__.py +0 -0
backend/feature_extraction/extractor.py +184 -0
backend/models/yolov8n-pose.pt +3 -0
backend/models/yolov8n.pt +3 -0
backend/preprocessing/__init__.py +0 -0
backend/preprocessing/preprocessor.py +94 -0
backend/services/__init__.py +0 -0
backend/services/prediction/__init__.py +0 -0
backend/services/prediction/predictor.py +75 -0
backend/services/video_data_extraction/__init__.py +0 -0
backend/services/video_data_extraction/video_preprocessor.py +146 -0
backend/utils/__init__.py +0 -0
backend/utils/csv_utils.py +57 -0
backend/utils/gpu.py +23 -0
backend/utils/id_utils.py +5 -0
backend/utils/interaction_utils.py +47 -0
backend/utils/iou_utils.py +13 -0
backend/utils/motion_utils.py +44 -0
backend/utils/visualizer.py +100 -0

backend/__init__.py ADDED Viewed

File without changes

backend/config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DETECT_MODEL = os.path.join(BASE_DIR, "models", "yolov8n.pt")
+POSE_MODEL = os.path.join(BASE_DIR, "models", "yolov8n-pose.pt")
+# Thresholds and params
+CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", 0.3))
+INACTIVE_TIMEOUT = int(os.getenv("INACTIVE_TIMEOUT", 30))
+FRAME_SKIP = int(os.getenv("FRAME_SKIP", 2))
+INPUT_SIZE = int(os.getenv("INPUT_SIZE", 640))
+# Paths
+BASE_DIR = os.path.abspath(os.path.dirname(__file__))
+OUTPUT_DIR = os.getenv("OUTPUT_DIR", os.path.join(BASE_DIR, "output"))
+os.makedirs(OUTPUT_DIR, exist_ok=True)

backend/data_extraction/__init__.py ADDED Viewed

File without changes

backend/data_extraction/interaction_analyzer.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import numpy as np
+from backend.utils.motion_utils import (
+    calc_avg_speed,
+    calc_motion_intensity,
+    calc_sudden_movements,
+)
+from backend.utils.interaction_utils import (
+    get_box_center,
+    euclidean_distance,
+    relative_distance,
+    relative_keypoints,
+)
+class InteractionAnalyzer:
+    """
+    Analyze human motion and interactions between people based on poses and bounding boxes.
+    """
+    def __init__(self):
+        # You can later add thresholds or state here if needed
+        pass
+    def calculate_motion_features(
+        self,
+        prev_poses: list[list[list[float]]],
+        current_poses: list[list[list[float]]],
+    ) -> dict:
+        """
+        Calculate motion features between consecutive frames.
+        Args:
+            prev_poses: List of keypoints for all people in previous frame
+            current_poses: List of keypoints for all people in current frame
+        Returns:
+            dict: {
+                "average_speed": float,
+                "motion_intensity": float,
+                "sudden_movements": int
+            }
+        """
+        return {
+            "average_speed": calc_avg_speed(prev_poses, current_poses),
+            "motion_intensity": calc_motion_intensity(prev_poses, current_poses),
+            "sudden_movements": calc_sudden_movements(prev_poses, current_poses),
+        }
+    def calculate_interactions(
+        self,
+        person_boxes: list[list[float]],
+        current_poses: list[list[list[float]]],
+        tracked_persons: dict,
+    ) -> list[dict]:
+        """
+        Calculate interactions between people based on bounding boxes and keypoints.
+        Args:
+            person_boxes: List of bounding boxes [[x1,y1,x2,y2], ...] for each person
+            current_poses: List of keypoints for each person
+            tracked_persons: Dict mapping person_id -> last tracked box
+        Returns:
+            List of dictionaries describing interactions between people
+        """
+        interactions = []
+        if len(person_boxes) < 2:
+            return interactions
+        for i in range(len(person_boxes)):
+            for j in range(i + 1, len(person_boxes)):
+                try:
+                    # Ensure poses exist for both people
+                    if i >= len(current_poses) or j >= len(current_poses):
+                        continue
+                    box1, box2 = person_boxes[i], person_boxes[j]
+                    pose1, pose2 = current_poses[i], current_poses[j]
+                    # Find person IDs
+                    id1, id2 = None, None
+                    for pid, tracked_box in tracked_persons.items():
+                        if np.array_equal(box1, tracked_box):
+                            id1 = pid
+                        if np.array_equal(box2, tracked_box):
+                            id2 = pid
+                    if id1 is None or id2 is None:
+                        continue
+                    # Build interaction dictionary using utils
+                    interaction = {
+                        "person1_idx": i,
+                        "person2_idx": j,
+                        "person1_id": id1,
+                        "person2_id": id2,
+                        "box1": box1,
+                        "box2": box2,
+                        "center1": get_box_center(box1),
+                        "center2": get_box_center(box2),
+                        "distance": euclidean_distance(
+                            get_box_center(box1), get_box_center(box2)
+                        ),
+                        "relative_distance": relative_distance(box1, box2),
+                        "keypoints": {
+                            "person1": pose1,
+                            "person2": pose2,
+                            "relative": relative_keypoints(pose1, pose2),
+                        },
+                    }
+                    interactions.append(interaction)
+                except Exception as e:
+                    print(f"Skipping interaction {i}-{j}: {e}")
+                    continue
+        return interactions

backend/data_extraction/person_tracker.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import numpy as np
+from backend.utils.iou_utils import calculate_iou
+from backend.utils.id_utils import get_new_id
+class PersonTracker:
+    """
+    Tracks people across frames by assigning consistent IDs to bounding boxes.
+    """
+    def __init__(self, inactive_timeout=30):
+        self.person_id_counter = 0
+        self.tracked_persons = {}  # {id: box}
+        self.inactive_persons = {}  # future use
+        self.inactive_timeout = inactive_timeout
+    def assign_person_ids(self, current_boxes):
+        """
+        Assign IDs to current frame boxes based on IoU with previous frame.
+        Args:
+            current_boxes (list of list): [[x1, y1, x2, y2], ...]
+        Returns:
+            dict: {person_id: box} for current frame
+        """
+        new_tracked = {}
+        used_ids = set()
+        if not self.tracked_persons:
+            # First frame - assign new IDs to all boxes
+            for box in current_boxes:
+                person_id, self.person_id_counter = get_new_id(self.person_id_counter)
+                new_tracked[person_id] = box
+        else:
+            # Convert boxes to numpy arrays
+            current_boxes_np = np.array(current_boxes)
+            prev_boxes_np = np.array(list(self.tracked_persons.values()))
+            if len(current_boxes_np) > 0 and len(prev_boxes_np) > 0:
+                # Compute IoU matrix
+                iou_matrix = np.zeros((len(current_boxes_np), len(prev_boxes_np)))
+                for i, curr_box in enumerate(current_boxes_np):
+                    for j, prev_box in enumerate(prev_boxes_np):
+                        iou_matrix[i, j] = calculate_iou(curr_box, prev_box)
+                # Match boxes based on IoU > 0.3
+                matched_pairs = []
+                for i in range(len(current_boxes_np)):
+                    max_j = np.argmax(iou_matrix[i])
+                    if iou_matrix[i, max_j] > 0.3:
+                        matched_pairs.append((i, max_j))
+                # Assign matched IDs
+                prev_ids = list(self.tracked_persons.keys())
+                for i, j in matched_pairs:
+                    person_id = prev_ids[j]
+                    new_tracked[person_id] = current_boxes_np[i]
+                    used_ids.add(person_id)
+                # Assign new IDs to unmatched boxes
+                for i, box in enumerate(current_boxes_np):
+                    if i not in [pair[0] for pair in matched_pairs]:
+                        person_id, self.person_id_counter = get_new_id(
+                            self.person_id_counter
+                        )
+                        new_tracked[person_id] = box
+        self.tracked_persons = new_tracked
+        return new_tracked
+    def reset(self):
+        """Reset the tracker for a new video."""
+        self.person_id_counter = 0
+        self.tracked_persons = {}
+        self.inactive_persons = {}

backend/feature_extraction/__init__.py ADDED Viewed

File without changes

backend/feature_extraction/extractor.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import torch
+from backend.config import DETECT_MODEL, POSE_MODEL, CONF_THRESHOLD
+from backend.utils.gpu import GPUConfigurator
+from backend.preprocessing.preprocessor import FramePreprocessor
+from backend.data_extraction.interaction_analyzer import InteractionAnalyzer
+from backend.data_extraction.person_tracker import PersonTracker
+from backend.utils.visualizer import Visualizer
+import numpy as np
+from ultralytics import YOLO
+class VideoFeatureExtractor:
+    def __init__(self):
+        self.gpu_config = GPUConfigurator()
+        self.device = self.gpu_config.device
+        self.detection_model = YOLO(DETECT_MODEL).to(self.device)
+        self.pose_model = YOLO(POSE_MODEL).to(self.device)
+        self.preprocessor = FramePreprocessor()
+        self.interaction_analyzer = InteractionAnalyzer()
+        self.person_tracker = PersonTracker()
+        self.visualizer = Visualizer()
+        self.conf_threshold = CONF_THRESHOLD
+        self.prev_poses = None
+        self.person_tracker.reset()
+        self.prev_poses = None
+    def extract_features(self, frame, frame_idx):
+        """Extract features from a frame."""
+        try:
+            processed_frame, scale_info = self.preprocessor.preprocess_frame(frame)
+            if processed_frame is None:
+                return None, frame
+            frame_tensor = (
+                torch.from_numpy(processed_frame)
+                .permute(2, 0, 1)
+                .unsqueeze(0)
+                .to(self.device)
+            )
+            if frame_idx % 5 == 0:
+                torch.cuda.empty_cache()
+            with (
+                torch.no_grad(),
+                torch.amp.autocast(device_type="cuda", dtype=torch.float16),
+            ):
+                det_results = self.detection_model(
+                    frame_tensor, conf=self.conf_threshold, verbose=False
+                )
+                pose_results = (
+                    self.pose_model(
+                        frame_tensor, conf=self.conf_threshold, verbose=False
+                    )
+                    if len(det_results[0].boxes) > 0
+                    else []
+                )
+            frame_data = {
+                "frame_index": frame_idx,
+                "timestamp": frame_idx / 30,
+                "persons": [],
+                "objects": [],
+                "interactions": [],
+                "resized_width": scale_info.get("resized_size", (0, 0))[1],
+                "resized_height": scale_info.get("resized_size", (0, 0))[0],
+            }
+            # Process detections
+            person_boxes = []
+            for result in det_results:
+                for box in result.boxes:
+                    try:
+                        cls = result.names[int(box.cls[0])]
+                        box_coords = box.xyxy[0].cpu().numpy().tolist()
+                        if cls == "person":
+                            person_boxes.append(box_coords)
+                        else:
+                            frame_data["objects"].append(
+                                {
+                                    "class": cls,
+                                    "confidence": float(box.conf[0]),
+                                    "box": box_coords,
+                                }
+                            )
+                    except Exception as e:
+                        print(f"Detection processing error: {e}")
+                        continue
+            # Track persons
+            tracked_persons = self.person_tracker.assign_person_ids(person_boxes)
+            # Process poses
+            current_poses = []
+            if pose_results:
+                for result in pose_results:
+                    if result.keypoints:
+                        for kpts in result.keypoints:
+                            try:
+                                pose_data = kpts.data[0].cpu().numpy().tolist()
+                                current_poses.append(pose_data)
+                            except Exception as e:
+                                print(f"Pose processing error: {e}")
+                                continue
+            # Match persons to poses
+            frame_data["persons"] = []
+            for i, box in enumerate(person_boxes):
+                try:
+                    pose = current_poses[i] if i < len(current_poses) else None
+                    if pose is None:
+                        continue
+                    # Find the person ID for this box
+                    person_id = None
+                    for pid, tracked_box in tracked_persons.items():
+                        if np.array_equal(box, tracked_box):
+                            person_id = pid
+                            break
+                    if person_id is None:
+                        continue
+                    frame_data["persons"].append(
+                        {
+                            "person_idx": i,
+                            "person_id": person_id,
+                            "box": box,
+                            "center": [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2],
+                            "keypoints": pose,
+                        }
+                    )
+                except Exception as e:
+                    print(f"Skipping person {i} due to error: {e}")
+                    continue
+            # Calculate motion features
+            motion_features = {
+                "average_speed": 0,
+                "motion_intensity": 0,
+                "sudden_movements": 0,
+            }
+            if self.prev_poses and current_poses:
+                try:
+                    motion_features = (
+                        self.interaction_analyzer.calculate_motion_features(
+                            self.prev_poses, current_poses
+                        )
+                    )
+                except Exception as e:
+                    print(f"Motion calculation error: {e}")
+            frame_data["motion_features"] = motion_features
+            self.prev_poses = current_poses
+            # Create interactions
+            frame_data["interactions"] = (
+                self.interaction_analyzer.calculate_interactions(
+                    person_boxes, current_poses, tracked_persons
+                )
+            )
+            # Add motion features to frame data
+            annotated_frame = self.visualizer.draw_detections(
+                frame, det_results, pose_results, scale_info, tracked_persons
+            )
+            return frame_data, annotated_frame
+        except Exception as e:
+            print(f"Frame {frame_idx} failed completely: {e}")
+            return None, frame
+    def reset(self):
+        """Reset state for a new video."""
+        self.person_tracker.reset()
+        self.prev_poses = None

backend/models/yolov8n-pose.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f80660bc2f97d664d86fc9f50fd5903af392fe332c0d603fa0dd6c78bf8844c
+size 6828990

backend/models/yolov8n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31e20dde3def09e2cf938c7be6fe23d9150bbbe503982af13345706515f2ef95
+size 6534387

backend/preprocessing/__init__.py ADDED Viewed

File without changes

backend/preprocessing/preprocessor.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import cv2
+import numpy as np
+class FramePreprocessor:
+    def __init__(self, input_size=640):
+        self.input_size = input_size
+    def set_resolution_config(self, frame_width, frame_height):
+        """Set appropriate configuration based on video resolution"""
+        max_dim = max(frame_width, frame_height)
+        # Adjust configuration based on resolution
+        if max_dim > 2560:  # 4K
+            frame_skip = 2
+            batch_size = 1
+        elif max_dim > 1920:  # 2K
+            frame_skip = 2
+            batch_size = 1
+        elif max_dim > 1280:  # Full HD
+            frame_skip = 1
+            batch_size = 2
+        else:  # HD or lower
+            frame_skip = 1
+            batch_size = 4
+        return batch_size, frame_skip
+    def preprocess_frame(self, frame):
+        """Preprocess frame while maintaining aspect ratio and handling high-res inputs"""
+        try:
+            # Convert BGR to RGB
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            original_h, original_w = frame_rgb.shape[:2]
+            # Calculate target size maintaining aspect ratio
+            scale = self.input_size / max(original_w, original_h)
+            target_w = int(original_w * scale)
+            target_h = int(original_h * scale)
+            # Resize image
+            resized = cv2.resize(
+                frame_rgb, (target_w, target_h), interpolation=cv2.INTER_AREA
+            )
+            # Create square canvas
+            canvas = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
+            # Calculate padding
+            pad_h = (self.input_size - target_h) // 2
+            pad_w = (self.input_size - target_w) // 2
+            # Place resized image on canvas
+            canvas[pad_h : pad_h + target_h, pad_w : pad_w + target_w] = resized
+            # Normalize
+            normalized = canvas.astype(np.float32) / 255.0
+            # Store scaling info
+            scale_info = {
+                "scale": scale,
+                "pad_w": pad_w,
+                "pad_h": pad_h,
+                "original_size": (original_h, original_w),
+                "resized_size": (target_h, target_w),
+            }
+            return normalized, scale_info
+        except Exception as e:
+            print(f"Preprocessing error: {e}")
+            return None, None
+    def rescale_coords(self, x, y, scale_info):
+        """Convert model coordinates back to original video dimensions"""
+        try:
+            scale = scale_info["scale"]
+            pad_w = scale_info["pad_w"]
+            pad_h = scale_info["pad_h"]
+            original_h, original_w = scale_info["original_size"]
+            # Remove padding and scale back to original dimensions
+            x_orig = int((x - pad_w) / scale)
+            y_orig = int((y - pad_h) / scale)
+            # Ensure coordinates are within bounds
+            x_orig = max(0, min(x_orig, original_w - 1))
+            y_orig = max(0, min(y_orig, original_h - 1))
+            return (x_orig, y_orig)
+        except Exception as e:
+            print(f"Rescaling error: {e}")
+            return (0, 0)

backend/services/__init__.py ADDED Viewed

File without changes

backend/services/prediction/__init__.py ADDED Viewed

File without changes

backend/services/prediction/predictor.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+class ViolencePredictor:
+    def __init__(self):
+        self.scaler = MinMaxScaler()
+    def preprocess_data(self, df):
+        # Normalize coordinates, distances and keypoints
+        # Drop confidence columns
+        # Scale selected columns
+        # Similar as existing code...
+        """
+        Preprocess the data by normalizing box coordinates, center coordinates, distances, and keypoints.
+        """
+        # Normalize box coordinates
+        frame_height = df["frame_height"]
+        frame_width = df["frame_width"]
+        df["box1_x_min"] = df["box1_x_min"] / frame_width
+        df["box1_y_min"] = df["box1_y_min"] / frame_height
+        df["box1_x_max"] = df["box1_x_max"] / frame_width
+        df["box1_y_max"] = df["box1_y_max"] / frame_height
+        df["box2_x_min"] = df["box2_x_min"] / frame_width
+        df["box2_y_min"] = df["box2_y_min"] / frame_height
+        df["box2_x_max"] = df["box2_x_max"] / frame_width
+        df["box2_y_max"] = df["box2_y_max"] / frame_height
+        # Normalize center coordinates
+        df["center1_x"] = df["center1_x"] / frame_width
+        df["center1_y"] = df["center1_y"] / frame_height
+        df["center2_x"] = df["center2_x"] / frame_width
+        df["center2_y"] = df["center2_y"] / frame_height
+        # Normalize distances
+        max_distance = np.sqrt(frame_width**2 + frame_height**2)
+        df["distance"] = df["distance"] / max_distance
+        df["relative_distance"] = df["relative_distance"] / max_distance
+        # Drop confidence columns
+        drop_columns = (
+            [f"person1_kp{i}_conf" for i in range(17)]
+            + [f"person2_kp{i}_conf" for i in range(17)]
+            + [f"relative_kp{i}_conf" for i in range(17)]
+        )
+        existing_columns = [col for col in drop_columns if col in df.columns]
+        df = df.drop(columns=existing_columns)
+        # Normalize keypoints
+        for i in range(17):
+            for prefix in ["person1_kp", "person2_kp", "relative_kp"]:
+                x_col = f"{prefix}{i}_x"
+                y_col = f"{prefix}{i}_y"
+                if x_col in df.columns:
+                    df[x_col] = df[x_col] / frame_width
+                if y_col in df.columns:
+                    df[y_col] = df[y_col] / frame_height
+        # Scale specific columns
+        df["distance"] = self.scaler.fit_transform(df[["distance"]])
+        df["relative_distance"] = self.scaler.fit_transform(df[["relative_distance"]])
+        df["motion_average_speed"] = self.scaler.fit_transform(
+            df[["motion_average_speed"]]
+        )
+        df["motion_motion_intensity"] = self.scaler.fit_transform(
+            df[["motion_motion_intensity"]]
+        )
+        return df
+    def predict(self, data):
+        return 0

backend/services/video_data_extraction/__init__.py ADDED Viewed

File without changes

backend/services/video_data_extraction/video_preprocessor.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import os
+import cv2
+import torch
+import pandas as pd
+from backend.feature_extraction.extractor import VideoFeatureExtractor
+from backend.utils.csv_utils import _create_interaction_row
+class VideoDataExtractor:
+    def __init__(self):
+        self.extractor = VideoFeatureExtractor()
+    def extract_video_data(
+        self,
+        video_path,
+        output_csv_path,
+        output_folder=None,
+        show_video=False,
+        save_video=False,
+    ):
+        """
+        Extract data from a video file.
+        Args:
+            video_path: Path to input video
+            output_csv_path: Path to save CSV output
+            output_folder: Folder to save output video
+            show_video: Whether to display video during processing
+            save_video: Whether to save output video
+        Returns:
+            Tuple of (frame_width, frame_height, num_interactions)
+        """
+        cap = None
+        video_writer = None
+        csv_data = []
+        seen_interactions = set()
+        try:
+            if not os.path.exists(video_path):
+                raise FileNotFoundError(f"Video file not found: {video_path}")
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                raise ValueError("Error: Could not open video file")
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            video_name = os.path.splitext(os.path.basename(video_path))[0]
+            # Set frame skip based on resolution
+            batch_size, frame_skip = self.extractor.preprocessor.set_resolution_config(
+                frame_width, frame_height
+            )
+            self.extractor.preprocessor.frame_skip = frame_skip
+            print(f"Processing video: {frame_width}x{frame_height} at {fps} fps")
+            print(f"Using frame_skip: {frame_skip}")
+            # Initialize video writer if needed
+            if output_folder and save_video:
+                os.makedirs(output_folder, exist_ok=True)
+                output_video_path = os.path.join(
+                    output_folder, f"{video_name}_detections.mp4"
+                )
+                video_writer = cv2.VideoWriter(
+                    output_video_path,
+                    cv2.VideoWriter_fourcc(*"mp4v"),
+                    fps / frame_skip,
+                    (frame_width, frame_height),
+                )
+            # Reset extractor for new video
+            self.extractor.reset()
+            # Process frames
+            for frame_idx in range(0, total_frames, frame_skip):
+                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Extract features
+                frame_data, annotated_frame = self.extractor.extract_features(
+                    frame, frame_idx
+                )
+                if frame_data is not None:
+                    # Process interactions
+                    for interaction in frame_data["interactions"]:
+                        interaction_id = (
+                            interaction["person1_id"],
+                            interaction["person2_id"],
+                            frame_idx,
+                        )
+                        if interaction_id not in seen_interactions:
+                            seen_interactions.add(interaction_id)
+                            row = _create_interaction_row(
+                                video_name,
+                                frame_data,
+                                interaction,
+                                frame_width,
+                                frame_height,
+                            )
+                            csv_data.append(row)
+                    # Write frame to output video
+                    if video_writer is not None and annotated_frame is not None:
+                        video_writer.write(annotated_frame)
+                    # Show video if enabled
+                    if show_video and annotated_frame is not None:
+                        cv2.imshow("Video Data Extraction", annotated_frame)
+                        key = cv2.waitKey(1) & 0xFF
+                        if key == ord("q"):
+                            break
+                # Clear memory periodically
+                if frame_idx % 100 == 0:
+                    torch.cuda.empty_cache()
+            if csv_data:
+                df = pd.DataFrame(csv_data)
+                if os.path.exists(output_csv_path):
+                    # Append to existing CSV
+                    df.to_csv(output_csv_path, mode="a", header=False, index=False)
+                    print(f"Appended {len(csv_data)} interactions to {output_csv_path}")
+                else:
+                    # Save new CSV
+                    df.to_csv(output_csv_path, index=False)
+                    print(f"Saved {len(csv_data)} interactions to {output_csv_path}")
+            return frame_width, frame_height, len(csv_data)
+        finally:
+            if cap is not None:
+                cap.release()
+            if video_writer is not None:
+                video_writer.release()
+            cv2.destroyAllWindows()
+            torch.cuda.empty_cache()

backend/utils/__init__.py ADDED Viewed

File without changes

backend/utils/csv_utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+def _create_interaction_row(
+    video_name, frame_data, interaction, frame_width, frame_height
+):
+    """Create a row of interaction data for CSV output."""
+    row = {
+        "video_name": video_name,
+        "frame_index": frame_data["frame_index"],
+        "timestamp": frame_data["timestamp"],
+        "frame_width": frame_width,
+        "frame_height": frame_height,
+        "person1_id": interaction["person1_id"],
+        "person2_id": interaction["person2_id"],
+        "box1_x_min": interaction["box1"][0],
+        "box1_y_min": interaction["box1"][1],
+        "box1_x_max": interaction["box1"][2],
+        "box1_y_max": interaction["box1"][3],
+        "box2_x_min": interaction["box2"][0],
+        "box2_y_min": interaction["box2"][1],
+        "box2_x_max": interaction["box2"][2],
+        "box2_y_max": interaction["box2"][3],
+        "center1_x": interaction["center1"][0],
+        "center1_y": interaction["center1"][1],
+        "center2_x": interaction["center2"][0],
+        "center2_y": interaction["center2"][1],
+        "distance": interaction["distance"],
+        "person1_idx": interaction["person1_idx"],
+        "person2_idx": interaction["person2_idx"],
+        "relative_distance": interaction["relative_distance"],
+        "motion_average_speed": frame_data["motion_features"]["average_speed"],
+        "motion_motion_intensity": frame_data["motion_features"]["motion_intensity"],
+        "motion_sudden_movements": frame_data["motion_features"]["sudden_movements"],
+    }
+    # Add keypoints data
+    keypoints_data = interaction["keypoints"]
+    for prefix in ["person1_kp", "person2_kp", "relative_kp"]:
+        for i in range(17):
+            for dim in ["_x", "_y", "_conf"]:
+                row[f"{prefix}{i}{dim}"] = None
+    # Fill in actual keypoint values if they exist
+    if isinstance(keypoints_data, dict):
+        for person_prefix, kp_data in [
+            ("person1_kp", keypoints_data.get("person1")),
+            ("person2_kp", keypoints_data.get("person2")),
+            ("relative_kp", keypoints_data.get("relative")),
+        ]:
+            if isinstance(kp_data, list):
+                for i, kp in enumerate(kp_data):
+                    if i >= 17:
+                        continue
+                    if isinstance(kp, (list, tuple)) and len(kp) >= 3:
+                        row[f"{person_prefix}{i}_x"] = float(kp[0])
+                        row[f"{person_prefix}{i}_y"] = float(kp[1])
+                        row[f"{person_prefix}{i}_conf"] = float(kp[2])
+    return row

backend/utils/gpu.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import torch
+import os
+class GPUConfigurator:
+    def __init__(self):
+        self.device = self._setup_device()
+        self._configure_gpu()
+    def _setup_device(self):
+        if torch.cuda.is_available():
+            device = torch.device("cuda")
+            torch.zeros(1).to(device)
+            torch.cuda.synchronize()
+            print(f"Using GPU: {torch.cuda.get_device_name(0)}")
+            return device
+        print("No GPU available. Using CPU.")
+        return torch.device("cpu")
+    def _configure_gpu(self):
+        if self.device.type == 'cuda':
+            torch.backends.cudnn.benchmark = True
+            torch.set_float32_matmul_precision('high')
+            os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

backend/utils/id_utils.py ADDED Viewed

	@@ -0,0 +1,5 @@

+def get_new_id(counter):
+    """Return a new ID and increment counter."""
+    new_id = counter
+    counter += 1
+    return new_id, counter

backend/utils/interaction_utils.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import numpy as np
+def get_box_center(box):
+    """
+    Calculate the center of a bounding box.
+    box: [x1, y1, x2, y2]
+    returns: [center_x, center_y]
+    """
+    return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]
+def euclidean_distance(point1, point2):
+    """
+    Compute Euclidean distance between two points.
+    point1, point2: [x, y]
+    returns: float
+    """
+    return float(np.linalg.norm(np.array(point1) - np.array(point2)))
+def relative_distance(box1, box2):
+    """
+    Compute relative distance between two boxes.
+    Returns distance normalized by sqrt(average box area)
+    """
+    center1 = get_box_center(box1)
+    center2 = get_box_center(box2)
+    distance = euclidean_distance(center1, center2)
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    avg_area = (area1 + area2) / 2
+    return distance / (avg_area**0.5)
+def relative_keypoints(pose1, pose2):
+    """
+    Compute difference between keypoints of two people.
+    Returns a list of [dx, dy] for each keypoint.
+    """
+    return (np.array(pose2) - np.array(pose1)).tolist()

backend/utils/iou_utils.py ADDED Viewed

	@@ -0,0 +1,13 @@

+def calculate_iou(box1, box2):
+    """Compute IoU between two boxes."""
+    xA = max(box1[0], box2[0])
+    yA = max(box1[1], box2[1])
+    xB = min(box1[2], box2[2])
+    yB = min(box1[3], box2[3])
+    inter_area = max(0, xB - xA) * max(0, yB - yA)
+    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union_area = box1_area + box2_area - inter_area
+    return inter_area / union_area if union_area > 0 else 0

backend/utils/motion_utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+def calc_avg_speed(prev_poses: list, current_poses: list) -> float:
+    if not prev_poses or not current_poses:
+        return 0.0
+    prev_poses = np.array(prev_poses)
+    current_poses = np.array(current_poses)
+    if prev_poses.shape != current_poses.shape:
+        return 0.0
+    displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
+    return float(np.mean(displacement))
+def calc_motion_intensity(prev_poses: list, current_poses: list) -> float:
+    if not prev_poses or not current_poses:
+        return 0.0
+    prev_poses = np.array(prev_poses)
+    current_poses = np.array(current_poses)
+    if prev_poses.shape != current_poses.shape:
+        return 0.0
+    displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
+    return float(np.std(displacement))
+def calc_sudden_movements(prev_poses: list, current_poses: list) -> int:
+    if not prev_poses or not current_poses:
+        return 0
+    prev_poses = np.array(prev_poses)
+    current_poses = np.array(current_poses)
+    if prev_poses.shape != current_poses.shape:
+        return 0
+    displacement = np.linalg.norm(current_poses - prev_poses, axis=2)
+    threshold = np.mean(displacement) + 2 * np.std(displacement)
+    return int(np.sum(displacement > threshold))

backend/utils/visualizer.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import cv2
+import numpy as np
+class Visualizer:
+    def __init__(self):
+        self.colors = {
+            "person": (0, 255, 0),    # Green
+            "keypoint": (255, 255, 0), # Yellow
+            "connection": (0, 255, 255)# Cyan
+        }
+    def rescale_coords(self, x, y, scale_info):
+        """Convert model coordinates back to original video dimensions"""
+        scale = scale_info['scale']
+        pad_w = scale_info['pad_w']
+        pad_h = scale_info['pad_h']
+        original_h, original_w = scale_info['original_size']
+        # Remove padding and scale back to original dimensions
+        x_orig = int((x - pad_w) / scale)
+        y_orig = int((y - pad_h) / scale)
+        # Ensure coordinates are within bounds
+        x_orig = max(0, min(x_orig, original_w - 1))
+        y_orig = max(0, min(y_orig, original_h - 1))
+        return (x_orig, y_orig)
+    def draw_detections(self, frame, det_results, pose_results, scale_info, tracked_persons):
+        """Draw detections and poses on the frame."""
+        try:
+            display_frame = frame.copy()
+            # Draw person boxes with IDs first
+            for person_id, box in tracked_persons.items():
+                try:
+                    if len(box) != 4:
+                        continue
+                    x1, y1, x2, y2 = map(float, box)
+                    x1, y1 = self.rescale_coords(x1, y1, scale_info)
+                    x2, y2 = self.rescale_coords(x2, y2, scale_info)
+                    # Ensure coordinates are valid
+                    if any(coord < 0 for coord in [x1, y1, x2, y2]):
+                        continue
+                    # Draw person box
+                    cv2.rectangle(display_frame,
+                                  (int(x1), int(y1)),
+                                  (int(x2), int(y2)),
+                                  self.colors["person"],
+                                  2)
+                    # Draw person ID
+                    id_text = f"ID:{person_id}"
+                    (text_w, text_h), _ = cv2.getTextSize(
+                        id_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2
+                    )
+                    cv2.rectangle(
+                        display_frame,
+                        (int(x2 - text_w - 5), int(y1)),
+                        (int(x2), int(y1 + text_h + 5)),
+                        self.colors["person"],
+                        -1,
+                    )
+                    cv2.putText(
+                        display_frame,
+                        id_text,
+                        (int(x2 - text_w - 2), int(y1 + text_h + 2)),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.5,
+                        (0, 0, 255),
+                        1,
+                    )
+                except Exception as e:
+                    print(f"Error drawing person ID {person_id}: {e}")
+                    continue
+            # Draw keypoints
+            if pose_results:
+                for result in pose_results:
+                    if result.keypoints:
+                        for kpts in result.keypoints:
+                            try:
+                                keypoints = kpts.data[0].cpu().numpy()
+                                for kp in keypoints:
+                                    x, y, conf = kp
+                                    if conf > 0.5:  # Only draw keypoints with high confidence
+                                        x, y = self.rescale_coords(x, y, scale_info)
+                                        cv2.circle(display_frame, (int(x), int(y)), 3, self.colors["keypoint"], -1)
+                            except Exception as e:
+                                print(f"Error drawing keypoints: {e}")
+                                continue
+            return display_frame
+        except Exception as e:
+            print(f"Error in draw_detections: {e}")
+            return frame