Spaces:

phitran
/

viralplay

Sleeping

App Files Files Community

phitran commited on Feb 9, 2025

Commit

041f44a

1 Parent(s): 58ae0fd

inital commit

Browse files

Files changed (24) hide show

.idea/.gitignore +3 -0
.idea/aws.xml +11 -0
.idea/inspectionProfiles/Project_Default.xml +23 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
.idea/viralplay.iml +14 -0
app.py +90 -0
handlers/.DS_Store +0 -0
handlers/__init__.py +0 -0
handlers/__pycache__/__init__.cpython-311.pyc +0 -0
handlers/__pycache__/frame_handler_resnet.cpython-311.pyc +0 -0
handlers/__pycache__/frame_handler_yolo.cpython-311.pyc +0 -0
handlers/__pycache__/video_handler.cpython-311.pyc +0 -0
handlers/app_yolo.py +41 -0
handlers/app_yolo_test.py +20 -0
handlers/frame_handler_resnet.py +253 -0
handlers/frame_handler_yolo.py +294 -0
handlers/video_handler.py +171 -0
handlers/yolov8n.pt +3 -0
input_data/.DS_Store +0 -0
requirements.txt +191 -0
yolov8n.pt +3 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/aws.xml ADDED Viewed

	@@ -0,0 +1,11 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="accountSettings">
+    <option name="activeRegion" value="us-east-1" />
+    <option name="recentlyUsedRegions">
+      <list>
+        <option value="us-east-1" />
+      </list>
+    </option>
+  </component>
+</project>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,23 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="3">
+            <item index="0" class="java.lang.String" itemvalue="transformers" />
+            <item index="1" class="java.lang.String" itemvalue="huggingface-hub" />
+            <item index="2" class="java.lang.String" itemvalue="datasets" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="E265" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.11 (viralplay)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (viralplay)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/viralplay.iml" filepath="$PROJECT_DIR$/.idea/viralplay.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

.idea/viralplay.iml ADDED Viewed

	@@ -0,0 +1,14 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+import shutil
+import spaces
+import gradio as gr
+from handlers import frame_handler_yolo as fh
+from handlers import video_handler as vh
+model_path = "yolov8n.pt"  # YOLOv8 model path
+@spaces.GPU(duration=300)
+def process_video(video_file):
+    """
+    Processes the uploaded video file by extracting key frames, cropping them, and generating a processed video.
+    """
+    status_message = "Processing started..."
+    # Define output directories
+    output_folder = "output_data"
+    all_frames_folder = os.path.join(output_folder, "all_frames")
+    key_frames_folder = os.path.join(output_folder, "key_frames")
+    nonkey_frames_folder = os.path.join(output_folder, "nonkey_frames")
+    cropped_frames_folder = os.path.join(output_folder, "cropped_frames")
+    processed_video_path = os.path.join(output_folder, "processed_video.mp4")
+    print("Calling process_video function: Output folder:", output_folder)
+    # Clear output directory before processing
+    if os.path.exists(output_folder):
+        shutil.rmtree(output_folder)
+    os.makedirs(output_folder, exist_ok=True)
+    # Save uploaded video temporarily
+    video_path = os.path.join(output_folder, "input_video.mp4")
+    with open(video_file.name, "rb") as vf:
+        with open(video_path, "wb") as f:
+            f.write(vf.read())
+    status_message = "Extracting frames..."
+    yield status_message, None
+    # Step 1: Extract all frames
+    vh.extract_all_frames(video_path, all_frames_folder)
+    status_message = "Detecting key frames..."
+    yield status_message, None
+    # Step 2: Extract key frames
+    original_fps = 30
+    fh.extract_key_frames(all_frames_folder, key_frames_folder, original_fps, model_path)
+    status_message = "Cropping key frames..."
+    yield status_message, None
+    # Step 3: Crop key frames based on object detection
+    target_resolution = (360, 640)  # Output resolution (9:16)
+    fh.crop_preserve_key_objects(key_frames_folder, cropped_frames_folder, model_path, target_resolution)
+    status_message = "Generating final video..."
+    yield status_message, None
+    # Step 4: Generate short video
+    target_resolution = (360, 640)  # Output resolution (9:16)
+    target_frame_rate = 30
+    vh.create_video_from_frames(cropped_frames_folder, processed_video_path, target_frame_rate, target_resolution)
+    status_message = "Processing complete!"
+    yield status_message, processed_video_path
+# Gradio Blocks UI
+with gr.Blocks() as demo:
+    gr.Markdown("## Generate short video for your football match")
+    gr.Markdown("Upload a video file. The app will extract key frames, crop them to fix 9:16 aspect ratio, "
+                "and generate a short video.")
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.File(label="Upload Video", type="filepath", file_types=["video"], file_count="single")
+        with gr.Column():
+            process_button = gr.Button("Process Video", variant="primary")
+            status_output = gr.Textbox(label="Status", interactive=False)
+        with gr.Column():
+            video_output = gr.Video(label="Processed Video", width=360, height=640)
+    process_button.click(process_video, inputs=video_input, outputs=[status_output, video_output])
+if __name__ == "__main__":
+    demo.launch()

handlers/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

handlers/__init__.py ADDED Viewed

File without changes

handlers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (176 Bytes). View file

handlers/__pycache__/frame_handler_resnet.cpython-311.pyc ADDED Viewed

Binary file (8.39 kB). View file

handlers/__pycache__/frame_handler_yolo.cpython-311.pyc ADDED Viewed

Binary file (13.6 kB). View file

handlers/__pycache__/video_handler.cpython-311.pyc ADDED Viewed

Binary file (6.94 kB). View file

handlers/app_yolo.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import video_handler as vh
+import frame_handler_yolo as fh
+model_path = "yolov8n.pt"  # YOLOv8 model path
+original_fps = 30  # Original FPS of the input videos
+video_path = "../input_data/football.mp4"  # Replace with your video path
+output_folder = "../output_data/"  # Folder to save extracted frames
+all_frames_folder = output_folder + "all_frames"
+# =============  Step 1: Extract frames from the input videos  ========
+vh.extract_all_frames(video_path, all_frames_folder)
+#frame_rate = 2  # Extract 2 frames per second
+#vh.extract_frames_by_rate(video_path, all_frames_folder, frame_rate)
+#============== Step 2: Extract key frames from the extracted frames  ========
+# key frames = frames contains a ball
+# if a previous frame of a key frame is a non-key frame - major movement detected
+# -> reclassify up to 30 previous frames (~ 1 second) as key frames to add context the major movement
+key_frames_folder = output_folder + "key_frames"  # Save key frames here
+nonkey_frames_folder = output_folder + "nonkey_frames"  # Save non-key frames here
+fh.extract_key_frames(all_frames_folder, key_frames_folder, original_fps, model_path)
+#============== Step 3: Crop the key frames to align with 9:16 ratio aspect while keeping the key object - football ball
+key_frames_9_16_folder = output_folder + "key_frames_9_16"  # Save processed frames here
+target_resolution = (360, 640)  # Output resolution (9:16)
+fh.crop_preserve_key_objects(key_frames_folder, key_frames_9_16_folder, model_path, target_resolution)
+#============== Step 4: Create a video from the processed frames  ========
+output_video_path_9_16 = output_folder + "output_video_9_16.mp4"  # Output video path
+target_frame_rate = 30  # Frames per second of the output videos
+vh.create_video_from_frames(key_frames_9_16_folder, output_video_path_9_16, target_frame_rate, target_resolution)

handlers/app_yolo_test.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import video_handler as vh
+import frame_handler_yolo as fh
+model_path = "yolov8n.pt"  # YOLOv8 model path
+original_fps = 30  # Original FPS of the input videos
+video_path = "../input_data/football.mp4"  # Replace with your video path
+output_folder = "../output_data/"  # Folder to save extracted frames
+all_frames_folder = output_folder + "all_frames"
+output_video_path_9_16 = output_folder + "output_video_9_16_test.mp4"  # Output video path
+target_frame_rate = 30  # Frames per second of the output videos
+target_resolution = (360, 640)  # Output resolution (9:16)
+# FOR TESTING ONLY
+test_key_frames_folder = output_folder + "test_key_frames"
+test_key_frames_9_16_folder = output_folder + "test_key_frames_9_16"
+#fh.crop_preserve_key_objects(test_key_frames_folder, test_key_frames_9_16_folder, model_path, target_resolution)
+vh.create_video_from_frames(test_key_frames_9_16_folder, output_video_path_9_16, target_frame_rate, target_resolution)

handlers/frame_handler_resnet.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import os
+import cv2
+import torch
+#from transformers import DetrImageProcessor, DetrForObjectDetection
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+from PIL import Image
+import numpy as np
+def crop_preserve_key_objects(input_folder, output_folder, model_name='facebook/detr-resnet-50', target_resolution=(360, 640)):
+    """
+    Preprocess frames to fit a target aspect ratio, focusing on the densest group of people
+    if a football is not detected, and extending the area until it reaches the target resolution.
+    Args:
+        input_folder (str): Path to the folder containing key frames.
+        output_folder (str): Path to save the processed frames.
+        model_name (str): Hugging Face model name for DETR.
+        target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
+    """
+    print("Preprocessing frames using DETR to fit the target aspect ratio...")
+    # Load the DETR model and processor
+    #processor = DetrImageProcessor.from_pretrained(model_name)
+    #model = DetrForObjectDetection.from_pretrained(model_name)
+    processor = AutoImageProcessor.from_pretrained(model_name)
+    model = AutoModelForObjectDetection.from_pretrained(model_name)
+    target_aspect_ratio = target_resolution[0] / target_resolution[1]
+    for frame_name in os.listdir(input_folder):
+        frame_path = os.path.join(input_folder, frame_name)
+        if not frame_name.lower().endswith(('.jpg', '.png')):
+            continue  # Skip non-image files
+        # Read the frame
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            print(f"Error reading frame: {frame_path}")
+            continue
+        original_height, original_width = frame.shape[:2]
+        frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        # Run inference
+        inputs = processor(images=frame_pil, return_tensors="pt")
+        outputs = model(**inputs)
+        # Extract bounding boxes and labels
+        logits = outputs.logits.softmax(-1)[0]
+        boxes = outputs.pred_boxes[0].cpu().detach().numpy()
+        labels = logits.argmax(-1).cpu().detach().numpy()
+        scores = logits.max(-1).values.cpu().detach().numpy()
+        # Filter boxes with a confidence threshold
+        confidence_threshold = 0.1
+        filtered_boxes = []
+        for i, score in enumerate(scores):
+            if score >= confidence_threshold:
+                filtered_boxes.append((labels[i], score, boxes[i]))
+        # Separate detections into categories
+        ball_detected = False
+        people_boxes = []
+        for label, score, box in filtered_boxes:
+            # Convert box from normalized coordinates to pixel values
+            x_min, y_min, x_max, y_max = (
+                int(box[0] * original_width),
+                int(box[1] * original_height),
+                int(box[2] * original_width),
+                int(box[3] * original_height),
+            )
+            if label == 32:  # "sports ball" class in COCO
+                print("Ball is detected in the frame.")
+                x_center = (x_min + x_max) // 2
+                y_center = (y_min + y_max) // 2
+                ball_detected = True
+                break
+            elif label == 1:  # "person" class in COCO
+                print("Person is detected in the frame.")
+                people_boxes.append((x_min, y_min, x_max, y_max))
+        # If no ball is detected, focus on the densest group of people
+        if not ball_detected and people_boxes:
+            # Cluster the people into groups based on proximity
+            centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
+            distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
+            # Define a distance threshold to group nearby people
+            threshold = max(original_width, original_height) * 0.1  # Adjust clustering sensitivity
+            clusters = []
+            visited = set()
+            for i, center in enumerate(centers):
+                if i in visited:
+                    continue
+                cluster = [i]
+                visited.add(i)
+                for j in range(len(centers)):
+                    if j not in visited and distances[i, j] < threshold:
+                        cluster.append(j)
+                        visited.add(j)
+                clusters.append(cluster)
+            # Find the largest cluster and calculate its bounding box
+            largest_cluster = max(clusters, key=len)
+            x_min = min(people_boxes[i][0] for i in largest_cluster)
+            y_min = min(people_boxes[i][1] for i in largest_cluster)
+            x_max = max(people_boxes[i][2] for i in largest_cluster)
+            y_max = max(people_boxes[i][3] for i in largest_cluster)
+            # Center the crop on the largest cluster
+            x_center = (x_min + x_max) // 2
+            y_center = (y_min + y_max) // 2
+        # Calculate the cropping region to fit the target resolution
+        new_width = int(original_height * target_aspect_ratio)
+        new_height = int(original_width / target_aspect_ratio)
+        x_start = max(0, x_center - new_width // 2)
+        y_start = max(0, y_center - new_height // 2)
+        x_end = min(original_width, x_start + new_width)
+        y_end = min(original_height, y_start + new_height)
+        # Adjust the crop if the size is smaller than the target resolution
+        if (x_end - x_start) < new_width:
+            x_start = max(0, x_end - new_width)
+        if (y_end - y_start) < new_height:
+            y_start = max(0, y_end - new_height)
+        # Crop and resize the frame
+        frame_cropped = frame[int(y_start):int(y_end), int(x_start):int(x_end)]
+        frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_LINEAR)
+        # Save the processed frame
+        output_path = os.path.join(output_folder, frame_name)
+        cv2.imwrite(output_path, frame_resized)
+        print(f"Processed frame saved: {output_path}")
+    print("Preprocessing completed.")
+#back up
+def backup_yolo_crop_preserve_key_objects(input_folder, output_folder, model_path='yolov8n.pt', target_resolution=(360, 640)):
+    """
+    Preprocess frames to fit a target aspect ratio, focusing on the densest group of people
+    if a football is not detected, and extending the area until it reaches the target resolution.
+    Args:
+        input_folder (str): Path to the folder containing key frames.
+        output_folder (str): Path to save the processed frames.
+        model_path (str): Path to the YOLOv8 model file.
+        target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
+    """
+    print("Preprocessing frames to fit the target aspect ratio...")
+    model = YOLO(model_path)
+    target_aspect_ratio = target_resolution[0] / target_resolution[1]
+    for frame_name in os.listdir(input_folder):
+        frame_path = os.path.join(input_folder, frame_name)
+        if not frame_name.lower().endswith(('.jpg', '.png')):
+            continue  # Skip non-image files
+        # Read the frame
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            print(f"Error reading frame: {frame_path}")
+            continue
+        original_height, original_width = frame.shape[:2]
+        # Run YOLOv8 inference
+        # TTP adjusted conf to 0.3 from 0.5 originally
+        results = model.predict(frame, conf=0.3)
+        # Initialize cropping region
+        x_center, y_center = original_width // 2, original_height // 2
+        ball_detected = False
+        people_boxes = []
+        # Process detections to find "sports ball" or "person"
+        for result in results[0].boxes:
+            label = result.cls
+            if model.names[int(label)] == "sports ball":
+                # Get the center of the detected football
+                x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
+                x_center = int((x_min + x_max) / 2)
+                y_center = int((y_min + y_max) / 2)
+                ball_detected = True
+                break
+            elif model.names[int(label)] == "person":
+                # Collect bounding boxes for people
+                x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
+                people_boxes.append((x_min, y_min, x_max, y_max))
+        # If no ball is detected, focus on the densest group of people
+        if not ball_detected and people_boxes:
+            # Cluster the people into groups based on proximity
+            centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
+            distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
+            # Define a distance threshold to group nearby people - Adjust clustering sensitivity
+            threshold = max(original_width, original_height) * 0.2  # TTP adjusted to 0.2
+            clusters = []
+            visited = set()
+            for i, center in enumerate(centers):
+                if i in visited:
+                    continue
+                cluster = [i]
+                visited.add(i)
+                for j in range(len(centers)):
+                    if j not in visited and distances[i, j] < threshold:
+                        cluster.append(j)
+                        visited.add(j)
+                clusters.append(cluster)
+            # Find the largest cluster and calculate its bounding box
+            largest_cluster = max(clusters, key=len)
+            x_min = min(people_boxes[i][0] for i in largest_cluster)
+            y_min = min(people_boxes[i][1] for i in largest_cluster)
+            x_max = max(people_boxes[i][2] for i in largest_cluster)
+            y_max = max(people_boxes[i][3] for i in largest_cluster)
+            # Center the crop on the largest cluster
+            x_center = int((x_min + x_max) / 2)
+            y_center = int((y_min + y_max) / 2)
+        # Calculate the cropping region to fit the target resolution
+        new_width = int(original_height * target_aspect_ratio)
+        new_height = int(original_width / target_aspect_ratio)
+        x_start = max(0, x_center - new_width // 2)
+        y_start = max(0, y_center - new_height // 2)
+        x_end = min(original_width, x_start + new_width)
+        y_end = min(original_height, y_start + new_height)
+        # Adjust the crop if the size is smaller than the target resolution
+        if (x_end - x_start) < new_width:
+            x_start = max(0, x_end - new_width)
+        if (y_end - y_start) < new_height:
+            y_start = max(0, y_end - new_height)
+        # Crop and resize the frame
+        frame_cropped = frame[int(y_start):int(y_end), int(x_start):int(x_end)]
+        frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_LINEAR)
+        # Save the processed frame
+        output_path = os.path.join(output_folder, frame_name)
+        cv2.imwrite(output_path, frame_resized)
+        print(f"Processed frame saved: {output_path}")
+    print("Preprocessing completed.")

handlers/frame_handler_yolo.py ADDED Viewed

	@@ -0,0 +1,294 @@

+import os
+import cv2
+import shutil
+from collections import deque
+from ultralytics import YOLO  # Assuming YOLOv8 library
+import numpy as np
+import functools
+import time
+def timer_decorator(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        execution_time = end_time - start_time
+        print(f"{func.__name__} took {execution_time:.2f} seconds to execute")
+        return result
+    return wrapper
+@timer_decorator
+def extract_key_frames(input_folder, key_frames_folder, original_fps=30, model_path='yolov8n.pt'):
+    """
+    Detects frames containing a football and separates them into key frames.
+    Reduces file I/O by loading frames into memory before processing.
+    Optimizations:
+        - Reads all frames into memory once to avoid multiple disk reads.
+        - Uses OpenCV to write frames instead of shutil.copy (faster).
+    Args:
+        input_folder (str): Path to the folder containing input frames.
+        key_frames_folder (str): Path to save frames containing a football.
+        original_fps: original frames per second (default is 30)
+        model_path (str): Path to the YOLOv8 model file (default is yolov8n.pt).
+    """
+    counter = 0
+    print("Extracting key frames with reduced file I/O...")
+    # Ensure the output directory exists
+    os.makedirs(key_frames_folder, exist_ok=True)
+    # Load YOLO model once
+    model = YOLO(model_path)
+    # Maintain last 30 non-key frames for reclassification
+    previous_nonkey_frames = deque(maxlen=original_fps)
+    processed_key_frames = set()
+    last_frame_was_key = False
+    # Load frames into memory first (Reduces file I/O), sort frames by file names
+    frame_names = sorted(os.listdir(input_folder))
+    frames = {}
+    for frame_name in frame_names:
+        if frame_name.lower().endswith(('.jpg', '.png')):
+            frame_path = os.path.join(input_folder, frame_name)
+            frames[frame_name] = cv2.imread(frame_path)  # Load into RAM
+    for frame_name, frame in frames.items():
+        if frame is None:
+            continue  # Skip invalid frames
+        counter += 1
+        if counter % 1000 == 0:
+            print(f"Processed {counter} frames.")
+        # Run YOLO inference
+        results = model.predict(frame, conf=0.3, verbose=False)
+        # Check if a football (sports ball) is detected
+        ball_detected = any(model.names[int(box.cls)] == "sports ball" for box in results[0].boxes)
+        if ball_detected:
+            # Reclassify up to 30 previous non-key frames
+            if not last_frame_was_key:
+                for _ in range(min(len(previous_nonkey_frames), 30)):
+                    nonkey_frame_name, nonkey_frame = previous_nonkey_frames.popleft()
+                    if nonkey_frame_name not in processed_key_frames:
+                        cv2.imwrite(os.path.join(key_frames_folder, nonkey_frame_name), nonkey_frame)
+                        processed_key_frames.add(nonkey_frame_name)
+                previous_nonkey_frames.clear()  # Reset after reclassification
+            # Save the current frame as a key frame if not already processed
+            if frame_name not in processed_key_frames:
+                cv2.imwrite(os.path.join(key_frames_folder, frame_name), frame)
+                processed_key_frames.add(frame_name)
+            last_frame_was_key = True
+        else:
+            previous_nonkey_frames.append((frame_name, frame))
+            last_frame_was_key = False
+    print("Key frame extraction complete (Optimized for File I/O).")
+@timer_decorator
+def crop_preserve_key_objects(input_folder, output_folder, model_path='yolov8n.pt', target_resolution=(360, 640)):
+    """
+    Preprocess frames to fit a target aspect ratio, focusing on key objects like a football or the densest group of people.
+    Reuse the cropping area if conditions are met for smoother transitions, and recalculate if objects are out of the reused cropping area.
+    Args:
+        input_folder (str): Path to the folder containing key frames.
+        output_folder (str): Path to save the processed frames.
+        model_path (str): Path to the YOLOv8 model file.
+        target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
+    """
+    print("Preprocessing frames to fit the target aspect ratio...")
+    ball_counter = 0
+    counter = 0
+    x_min, y_min, x_max, y_max = 0, 0, 0, 0
+    model = YOLO(model_path)
+    target_aspect_ratio = target_resolution[0] / target_resolution[1]
+    os.makedirs(output_folder, exist_ok=True)
+    # Sort frames by file name
+    frame_files = sorted(
+        [f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.png'))]
+    )
+    last_cropping_area = None  # Store the last cropping area
+    last_objects_detected = None  # Track the type of object detected in the last frame ("ball" or "people")
+    for frame_name in frame_files:
+        frame_path = os.path.join(input_folder, frame_name)
+        if not frame_name.lower().endswith(('.jpg', '.png')):
+            continue  # Skip non-image files
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            print(f"Error reading frame: {frame_path}")
+            continue
+        counter += 1
+        if counter % 100 == 0:
+            status_message = f"Preprocessing frames to fit the target aspect ratio...Processed {counter} frames."
+            #yield status_message, None
+        original_height, original_width = frame.shape[:2]
+        # YOLO inference
+        results = model.predict(frame, conf=0.3, verbose=False)
+        # Initialize variables
+        x_center, y_center = original_width // 2, original_height // 2
+        ball_detected = False
+        people_boxes = []
+        ball_box = None  # To store ball coordinates for case 1
+        # Process detections
+        for result in results[0].boxes:
+            label = result.cls
+            x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
+            if model.names[int(label)] == "sports ball":
+                ball_detected = True
+                ball_box = (x_min, y_min, x_max, y_max)
+                ball_counter += 1
+            elif model.names[int(label)] == "person":
+                people_boxes.append((x_min, y_min, x_max, y_max))
+                #draw red boxes around peoples
+                #cv2.rectangle(frame, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 2)
+        # Determine whether to reuse the last cropping area
+        reuse_last_area = False
+        if last_cropping_area:
+            if ball_detected and last_objects_detected == "ball":
+                # Check if the ball is within the last cropping area
+                x_min, y_min, x_max, y_max = last_cropping_area
+                if ball_box and (ball_box[0] >= x_min and ball_box[1] >= y_min and
+                                 ball_box[2] <= x_max and ball_box[3] <= y_max):
+                    reuse_last_area = True
+            elif people_boxes and last_objects_detected == "people":
+                #result the last_cropping_area until the ball appears
+                reuse_last_area = True
+        if reuse_last_area:
+            # Reuse the last cropping area
+            x_min, y_min, x_max, y_max = last_cropping_area
+        else:
+            # Calculate a new cropping area
+            if ball_detected:
+                # Focus on the ball
+                x_min, y_min, x_max, y_max = ball_box
+                last_objects_detected = "ball"
+            elif people_boxes:
+                # Find the densest group of people
+                x_min, y_min, x_max, y_max = calculate_largest_group_box(people_boxes, original_width, original_height)
+                #draw blue box around densest people area
+                #cv2.rectangle(frame, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (255, 0, 0), 2)
+                last_objects_detected = "people"
+            # Default to center cropping if nothing is detected
+            # Center the crop on the largest cluster
+            x_center = (x_min + x_max) // 2
+            y_center = (y_min + y_max) // 2
+            new_width = int(original_height * target_aspect_ratio)
+            new_height = int(original_width / target_aspect_ratio)
+            x_min = max(0, x_center - new_width // 2)
+            y_min = max(0, y_center - new_height // 2)
+            x_max = min(original_width, x_min + new_width)
+            y_max = min(original_height, y_min + new_height)
+            # Adjust the crop if the size is smaller than the target resolution
+            if (x_max - x_min) < new_width:
+                x_min = max(0, x_max - new_width)
+            if (y_max - y_min) < new_height:
+                y_min = max(0, y_max - new_height)
+            #Update the last cropping area
+            last_cropping_area = (x_min, y_min, x_max, y_max)
+        # Crop and resize the frame
+        frame_cropped = frame[int(y_min):int(y_max), int(x_min):int(x_max)]
+        frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_CUBIC)
+        # Save the processed frame
+        output_path = os.path.join(output_folder, frame_name)
+        cv2.imwrite(output_path, frame_resized)
+        #save the debug frame
+        #debug_path = os.path.join(output_folder, "debug_" + frame_name)
+        #cv2.imwrite(debug_path, frame)
+    print("Completed preprocessing frames to fit the target aspect ratio.")
+    print(f"Total frames processed: {len(frame_files)}")
+    print(f"Total frames detected with a sport ball: {ball_counter}")
+def calculate_largest_group_box(people_boxes, original_width, original_height):
+    """
+    Calculate the bounding box for the densest group of people.
+    Args:
+        people_boxes (list of tuples): List of bounding boxes for detected people.
+                                       Each box is (x_min, y_min, x_max, y_max).
+        original_width (int): Width of the original frame.
+        original_height (int): Height of the original frame.
+    Returns:
+        tuple: Bounding box (x_min, y_min, x_max, y_max) for the densest group of people.
+    """
+    if not people_boxes:
+        return None  # Return None if no people boxes are provided
+    # Get the center points of all bounding boxes
+    centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
+    # Calculate pairwise distances between all centers
+    distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
+    # Define a distance threshold for clustering. Adjust this value if needed
+    threshold = max(original_width, original_height) * 0.2  # TTP adjusted to 0.2 to allow bigger distance
+    # Perform clustering using a flood-fill approach
+    clusters = []
+    visited = set()
+    for i, center in enumerate(centers):
+        if i in visited:
+            continue
+        cluster = [i]
+        queue = [i]
+        visited.add(i)
+        for j in range(len(centers)):
+            if j not in visited and distances[i, j] < threshold:
+                cluster.append(j)
+                visited.add(j)
+        clusters.append(cluster)
+    # Find the largest cluster based on the number of people
+    largest_cluster = max(clusters, key=len)
+    # Calculate the bounding box for the largest cluster
+    x_min = min(people_boxes[i][0] for i in largest_cluster)
+    y_min = min(people_boxes[i][1] for i in largest_cluster)
+    x_max = max(people_boxes[i][2] for i in largest_cluster)
+    y_max = max(people_boxes[i][3] for i in largest_cluster)
+    # Expand the bounding box slightly to include some context
+    #padding_x = int(original_width * 0.05)  # 5% padding horizontally
+    #padding_y = int(original_height * 0.05)  # 5% padding vertically
+    #x_min = max(0, x_min - padding_x)
+    #y_min = max(0, y_min - padding_y)
+    #x_max = min(original_width, x_max + padding_x)
+    #y_max = min(original_height, y_max + padding_y)
+    return x_min, y_min, x_max, y_max

handlers/video_handler.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import os
+import cv2
+import functools
+import time
+def timer_decorator(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        execution_time = end_time - start_time
+        print(f"{func.__name__} took {execution_time:.2f} seconds to execute")
+        return result
+    return wrapper
+@timer_decorator
+def extract_frames_by_rate(video_path, output_folder, frame_rate=1):
+    """
+    Extracts frames from a video at a specified frame rate.
+    Args:
+        video_path (str): Path to the input video file.
+        output_folder (str): Directory to save the extracted frames.
+        frame_rate (int): Number of frames to extract per second of the video.
+    """
+    # Ensure the output directory exists
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    # Load the video
+    video = cv2.VideoCapture(video_path)
+    # Check if the video is opened successfully
+    if not video.isOpened():
+        print(f"Error: Cannot open video file {video_path}")
+        return
+    # Get video properties
+    fps = int(video.get(cv2.CAP_PROP_FPS))  # Frames per second
+    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames
+    duration = total_frames / fps  # Duration in seconds
+    print(f"Video loaded: {video_path}")
+    print(f"Total Frames: {total_frames}, FPS: {fps}, Duration: {duration:.2f} seconds")
+    # Calculate frame interval (in terms of frame number)
+    frame_interval = fps // frame_rate
+    # Frame counter
+    frame_count = 0
+    saved_count = 0
+    while True:
+        # Read a frame
+        ret, frame = video.read()
+        # Break the loop if the video ends
+        if not ret:
+            break
+        # Save frame if it matches the frame interval
+        if frame_count % frame_interval == 0:
+            frame_filename = os.path.join(output_folder, f"frame_{saved_count:05d}.jpg")
+            cv2.imwrite(frame_filename, frame)
+            #print(f"Saved: {frame_filename}")
+            saved_count += 1
+        frame_count += 1
+    # Release video resources
+    video.release()
+    print(f"Extraction complete. Total frames saved: {saved_count}")
+@timer_decorator
+def extract_all_frames(video_path, output_folder):
+    """
+    Extracts all frames from a video.
+    Args:
+        video_path (str): Path to the input video file.
+        output_folder (str): Directory to save the extracted frames.
+    """
+    # Ensure the output directory exists
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    # Load the video
+    video = cv2.VideoCapture(video_path)
+    # Check if the video is opened successfully
+    if not video.isOpened():
+        print(f"Error: Cannot open video file {video_path}")
+        return
+    # Get video properties
+    fps = int(video.get(cv2.CAP_PROP_FPS))  # Frames per second
+    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames
+    duration = total_frames / fps  # Duration in seconds
+    print(f"Video loaded: {video_path}")
+    print(f"Total Frames: {total_frames}, FPS: {fps}, Duration: {duration:.2f} seconds")
+    print("Extracting frames from the video...")
+    # Frame counter
+    frame_count = 0
+    while True:
+        # Read a frame
+        ret, frame = video.read()
+        # Break the loop if the video ends
+        if not ret:
+            break
+        # Save every frame
+        frame_filename = os.path.join(output_folder, f"frame_{frame_count:05d}.jpg")
+        cv2.imwrite(frame_filename, frame)
+        frame_count += 1
+    # Release video resources
+    video.release()
+    print(f"Extraction complete. Total frames saved: {frame_count}")
+@timer_decorator
+def create_video_from_frames(input_folder, output_video_path, frame_rate=30, resolution=(360, 640)):
+    """
+       Creates a video from preprocessed frames.
+       Args:
+           input_folder (str): Path to the folder containing frames.
+           output_video_path (str): Path to save the output video.
+           frame_rate (int): Frames per second for the output video.
+           resolution (tuple): Resolution of the output video (width, height).
+       """
+    # Get sorted list of image files in the folder
+    frame_files = sorted(
+        [f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.png'))]
+    )
+    # Initialize the video writer
+    fourcc = cv2.VideoWriter_fourcc(*'avc1')  # H.264 Codec
+    video_writer = cv2.VideoWriter(output_video_path, fourcc, frame_rate, resolution)
+    for frame_file in frame_files:
+        frame_path = os.path.join(input_folder, frame_file)
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            print(f"Error reading frame: {frame_path}")
+            continue
+        # Get the frame's original resolution
+        original_height, original_width = frame.shape[:2]
+        # **Check if resizing is needed**
+        if (original_width, original_height) != resolution:
+            # Ensure the frame matches the target resolution
+            frame = cv2.resize(frame, resolution, interpolation=cv2.INTER_CUBIC)
+        # Write the frame to the video
+        video_writer.write(frame)
+    # Release the video writer
+    video_writer.release()
+    print(f"Video saved to: {output_video_path}")

handlers/yolov8n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
+size 6549796

input_data/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,191 @@

+absl-py==2.1.0
+aiofiles==23.2.1
+aiohttp==3.8.4
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.6.0
+anyio==4.3.0
+appnope==0.1.4
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+async-lru==2.0.4
+async-timeout==4.0.2
+attrs==23.1.0
+babel==2.16.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+boto3==1.28.73
+botocore==1.31.73
+captum==0.7.0
+certifi==2023.5.7
+cffi==1.17.1
+charset-normalizer==3.1.0
+clarabel==0.9.0
+click==8.1.7
+comm==0.2.2
+contourpy==1.2.1
+cvxpy==1.6.0
+cycler==0.12.1
+debugpy==1.8.5
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.7
+dnspython==2.6.1
+email_validator==2.1.1
+executing==2.1.0
+fastapi==0.111.0
+fastapi-cli==0.0.3
+fastjsonschema==2.20.0
+ffmpy==0.3.2
+filelock==3.12.4
+fonttools==4.51.0
+fqdn==1.5.1
+frozenlist==1.3.3
+fsspec==2023.6.0
+gradio_client==0.16.2
+grpcio==1.66.2
+h11==0.14.0
+holidays==0.57
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.0
+idna==3.4
+importlib_resources==6.4.0
+ipykernel==6.29.5
+ipython==8.27.0
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.2
+jmespath==1.0.1
+joblib==1.4.2
+json5==0.9.25
+jsonpointer==3.0.0
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyter_server==2.14.2
+jupyter_server_terminals==0.5.3
+jupyterlab==4.2.5
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+kaleido==0.2.1
+kiwisolver==1.4.5
+liac-arff==2.5.0
+lightning-utilities==0.11.7
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+matplotlib==3.8.4
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+minio==7.2.8
+mistune==3.0.2
+mpmath==1.3.0
+multidict==6.0.4
+multiprocess==0.70.15
+nbclient==0.10.0
+nbconvert==7.16.4
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.1
+neuralprophet==0.9.0
+notebook_shim==0.2.4
+numpy==1.26.1
+openai==0.27.7
+openml==0.14.2
+orjson==3.10.3
+osqp==0.6.7.post3
+overrides==7.7.0
+packaging==23.2
+pandas==2.1.1
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.3.0
+platformdirs==4.3.2
+plotly==5.24.1
+prometheus_client==0.20.0
+prompt_toolkit==3.0.47
+protobuf==5.28.2
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==13.0.0
+pycparser==2.22
+pycryptodome==3.20.0
+pydantic==2.7.1
+pydantic_core==2.18.2
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.8.2
+python-dotenv==1.0.1
+python-json-logger==2.0.7
+python-multipart==0.0.9
+pytorch-lightning==2.4.0
+pytz==2023.3.post1
+PyYAML==6.0.1
+pyzmq==26.2.0
+qdldl==0.1.7.post4
+referencing==0.35.1
+regex==2023.10.3
+requests==2.31.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.1
+rpds-py==0.18.1
+ruff==0.4.3
+s3transfer==0.7.0
+safetensors==0.4.0
+scikit-learn==1.5.2
+scipy==1.14.1
+scs==3.2.7
+semantic-version==2.10.0
+Send2Trash==1.8.3
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+soupsieve==2.6
+stack-data==0.6.3
+starlette==0.37.2
+sympy==1.12
+tenacity==9.0.0
+tensorboard==2.18.0
+tensorboard-data-server==0.7.2
+terminado==0.18.1
+threadpoolctl==3.5.0
+tinycss2==1.3.0
+tokenizers==0.14.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.1.0
+torchdata==0.7.0
+torchmetrics==1.4.2
+tornado==6.4.1
+tqdm==4.65.0
+traitlets==5.14.3
+typer==0.12.3
+types-python-dateutil==2.9.0.20240906
+typing_extensions==4.8.0
+tzdata==2023.3
+ujson==5.9.0
+uri-template==1.3.0
+urllib3==2.0.2
+uvicorn==0.29.0
+uvloop==0.19.0
+watchfiles==0.21.0
+wcwidth==0.2.13
+webcolors==24.8.0
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==11.0.3
+Werkzeug==3.0.4
+xmltodict==0.13.0
+xxhash==3.4.1
+yarl==1.9.2

yolov8n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
+size 6549796