Spaces:

vertalius
/

Pose-Detection-App

Sleeping

App Files Files Community

vertalius commited on Feb 22, 2025

Commit

f9f78c7

verified ·

1 Parent(s): c25c4dd

Update utils.py

Browse files

Files changed (1) hide show

utils.py +48 -39

utils.py CHANGED Viewed

@@ -2,6 +2,7 @@ import cv2
 import tempfile
 import numpy as np
 import os
 from animation_renderer import AnimationRenderer
 def process_image(image, pose_detector, skeleton_generator):
@@ -22,16 +23,17 @@ def process_image(image, pose_detector, skeleton_generator):
 def process_video(video_path, pose_detector, skeleton_generator):
     """
-    Process video for pose detection and skeleton generation with improved error handling and chunked processing
     """
     cap = None
     out = None
     try:
         # Optimize video processing
-        chunk_size = 5  # Process fewer frames at a time
-        buffer_size = 512 * 1024  # Smaller buffer for stability
-        cv2.setNumThreads(2)  # Allow 2 threads for better performance
-        cv2.ocl.setUseOpenCL(False)  # Disable OpenCL to prevent crashes
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
@@ -50,7 +52,7 @@ def process_video(video_path, pose_detector, skeleton_generator):
             frame_width = target_width
             frame_height = int(frame_height * scale)
-        # Create temporary file with better error handling
         try:
             temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
             output_path = temp_output.name
@@ -68,24 +70,22 @@ def process_video(video_path, pose_detector, skeleton_generator):
         frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         fps = int(cap.get(cv2.CAP_PROP_FPS))
-        if fps == 0:  # Handle GIF files which might report 0 fps
             fps = 30
-        # Initialize animation renderer
         renderer = AnimationRenderer(fps=fps)
         # Create temporary file for processed video
         temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
         output_path = temp_output.name
-        # Initialize video writer with more efficient settings
-        max_dimension = 480  # Reduced max dimension
         if frame_width > max_dimension or frame_height > max_dimension:
             scale = min(max_dimension / frame_width, max_dimension / frame_height)
             frame_width = int(frame_width * scale)
             frame_height = int(frame_height * scale)
-        # Use MP4V codec which is more memory efficient
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(output_path, fourcc, min(fps, 30), (frame_width, frame_height))
@@ -103,21 +103,21 @@ def process_video(video_path, pose_detector, skeleton_generator):
                 frame = cv2.resize(frame, (frame_width, frame_height))
             try:
-                # Resize frame for better detection
                 fh, fw = frame.shape[:2]
                 if fw > 640:
                     scale = 640 / fw
                     frame = cv2.resize(frame, (640, int(fh * scale)))
-                # Process frame with error handling and retry
                 retries = 3
                 while retries > 0:
                     landmarks, annotated_frame = pose_detector.detect_video_frame(frame)
                     if landmarks is not None:
                         break
                     retries -= 1
-                # Ensure frame is properly encoded before writing
                 if annotated_frame is not None:
                     out.write(annotated_frame)
                 else:
@@ -130,10 +130,13 @@ def process_video(video_path, pose_detector, skeleton_generator):
                         renderer.add_keyframe(landmarks, pose_detector.pose_connections, frame_time)
                     except Exception as e:
                         print(f"Frame {frame_count} skeleton generation error: {str(e)}")
                         if animation_frames:
                             animation_frames.append(animation_frames[-1])
-                elif animation_frames:
-                    animation_frames.append(animation_frames[-1])
             except Exception as e:
                 print(f"Frame {frame_count} processing error: {str(e)}")
@@ -142,19 +145,18 @@ def process_video(video_path, pose_detector, skeleton_generator):
             frame_count += 1
             frame_time = frame_count / fps
-            if frame_count > 1000:  # Safety limit for very long videos
                 break
-        # Release resources
         if cap is not None:
             cap.release()
         if out is not None:
             out.release()
-        # Convert output video to proper format using more compatible settings
         converted_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
         os.system(f'ffmpeg -y -i {output_path} -vcodec libx264 -preset ultrafast -pix_fmt yuv420p {converted_output.name}')
-        os.unlink(output_path)  # Remove the original output
         return converted_output.name, animation_frames
@@ -169,39 +171,38 @@ def process_video(video_path, pose_detector, skeleton_generator):
 def process_gif(gif_path, pose_detector, skeleton_generator):
     """
     Process GIF for pose detection and skeleton generation.
-    This implementation uses Pillow to extract frames from the GIF, processes each frame,
     and creates a temporary MP4 video with the processed frames.
     """
     try:
         from PIL import Image, ImageSequence
-        # Открываем GIF с помощью Pillow
         gif = Image.open(gif_path)
         frames = []
         for frame in ImageSequence.Iterator(gif):
             frame = frame.convert("RGB")
             frame_np = np.array(frame)
-            # Переводим RGB в BGR (OpenCV использует BGR)
             frame_cv = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR)
             frames.append(frame_cv)
         processed_frames = []
         animation_frames = []
-        # Обрабатываем каждый кадр
         for frame in frames:
             landmarks, annotated_frame = pose_detector.detect_video_frame(frame)
             if annotated_frame is None:
                 annotated_frame = frame
             processed_frames.append(annotated_frame)
             if landmarks is not None:
                 skeleton_data = skeleton_generator.generate_skeleton(landmarks)
             else:
-                # Если не удалось получить данные, используем предыдущий, если он есть
                 skeleton_data = animation_frames[-1] if animation_frames else {}
             animation_frames.append(skeleton_data)
-        # Сохраняем обработанные кадры в временное видео (MP4)
         height, width = processed_frames[0].shape[:2]
-        fps = 10  # Подобранное значение для GIF
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         temp_video = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
         out = cv2.VideoWriter(temp_video.name, fourcc, fps, (width, height))
@@ -217,37 +218,42 @@ def process_gif(gif_path, pose_detector, skeleton_generator):
 def process_video_upload(uploaded_file, components, processed_file, db, is_gif, col1, col2):
     """
     Handle video/GIF file upload processing.
-    Для GIF-файлов оригинальное воспроизведение заменяется на обработанное видео,
-    чтобы обеспечить корректное отображение анимации со скелетом на каждом кадре.
     """
     pose_detector, skeleton_generator, animation_exporter = components
     # Считываем байты файла
     file_bytes = uploaded_file.read()
-    # Сохраняем в временный файл
-    temp_input = tempfile.NamedTemporaryFile(suffix=('.gif' if is_gif else '.mp4'), delete=False)
     temp_input.write(file_bytes)
     temp_input.seek(0)
     video_path = temp_input.name
     if is_gif:
         processed_video_path, animation_frames = process_gif(video_path, pose_detector, skeleton_generator)
-        with col1:
-            if processed_video_path:
-                with open(processed_video_path, "rb") as f:
-                    st.video(f.read())
-            else:
-                st.error("Error processing GIF.")
     else:
-        with col1:
-            st.video(file_bytes)
         processed_video_path, animation_frames = process_video(video_path, pose_detector, skeleton_generator)
     if not animation_frames:
         raise ValueError("No poses detected in the video/gif")
     save_video_data(db, processed_file.id, animation_frames)
     animation_data_binary = animation_exporter.export_animation(animation_frames)
     with col2:
         if processed_video_path:
             with open(processed_video_path, "rb") as f:
@@ -270,6 +276,9 @@ def save_animation_data(db, file_id: int, skeleton_data: dict):
 def save_video_data(db, file_id: int, animation_frames: list):
     from database import PoseData
     for frame_num, frame_data in enumerate(animation_frames):
         pose_data = PoseData(file_id=file_id, frame_number=frame_num, landmarks=frame_data)
         db.add(pose_data)
     db.commit()

 import tempfile
 import numpy as np
 import os
+import streamlit as st
 from animation_renderer import AnimationRenderer
 def process_image(image, pose_detector, skeleton_generator):
 def process_video(video_path, pose_detector, skeleton_generator):
     """
+    Process video for pose detection and skeleton generation
+    with improved error handling and chunked processing
     """
     cap = None
     out = None
     try:
         # Optimize video processing
+        chunk_size = 5
+        buffer_size = 512 * 1024
+        cv2.setNumThreads(2)
+        cv2.ocl.setUseOpenCL(False)
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
             frame_width = target_width
             frame_height = int(frame_height * scale)
+        # Create temporary file
         try:
             temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
             output_path = temp_output.name
         frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         fps = int(cap.get(cv2.CAP_PROP_FPS))
+        if fps == 0:
             fps = 30
         renderer = AnimationRenderer(fps=fps)
         # Create temporary file for processed video
         temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
         output_path = temp_output.name
+        # Initialize video writer
+        max_dimension = 480
         if frame_width > max_dimension or frame_height > max_dimension:
             scale = min(max_dimension / frame_width, max_dimension / frame_height)
             frame_width = int(frame_width * scale)
             frame_height = int(frame_height * scale)
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(output_path, fourcc, min(fps, 30), (frame_width, frame_height))
                 frame = cv2.resize(frame, (frame_width, frame_height))
             try:
                 fh, fw = frame.shape[:2]
                 if fw > 640:
                     scale = 640 / fw
                     frame = cv2.resize(frame, (640, int(fh * scale)))
+                # Process frame
                 retries = 3
+                landmarks, annotated_frame = None, None
                 while retries > 0:
                     landmarks, annotated_frame = pose_detector.detect_video_frame(frame)
                     if landmarks is not None:
                         break
                     retries -= 1
+                # Write to output
                 if annotated_frame is not None:
                     out.write(annotated_frame)
                 else:
                         renderer.add_keyframe(landmarks, pose_detector.pose_connections, frame_time)
                     except Exception as e:
                         print(f"Frame {frame_count} skeleton generation error: {str(e)}")
+                        # Если возникла ошибка, используем последний корректный кадр
                         if animation_frames:
                             animation_frames.append(animation_frames[-1])
+                else:
+                    # Нет новых landmarks, дублируем предыдущий, если есть
+                    if animation_frames:
+                        animation_frames.append(animation_frames[-1])
             except Exception as e:
                 print(f"Frame {frame_count} processing error: {str(e)}")
             frame_count += 1
             frame_time = frame_count / fps
+            if frame_count > 1000:  # Safety limit
                 break
         if cap is not None:
             cap.release()
         if out is not None:
             out.release()
+        # Convert output video to x264
         converted_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
         os.system(f'ffmpeg -y -i {output_path} -vcodec libx264 -preset ultrafast -pix_fmt yuv420p {converted_output.name}')
+        os.unlink(output_path)
         return converted_output.name, animation_frames
 def process_gif(gif_path, pose_detector, skeleton_generator):
     """
     Process GIF for pose detection and skeleton generation.
+    Uses Pillow to extract frames, processes each frame,
     and creates a temporary MP4 video with the processed frames.
     """
     try:
         from PIL import Image, ImageSequence
         gif = Image.open(gif_path)
         frames = []
         for frame in ImageSequence.Iterator(gif):
             frame = frame.convert("RGB")
             frame_np = np.array(frame)
             frame_cv = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR)
             frames.append(frame_cv)
         processed_frames = []
         animation_frames = []
         for frame in frames:
             landmarks, annotated_frame = pose_detector.detect_video_frame(frame)
             if annotated_frame is None:
                 annotated_frame = frame
             processed_frames.append(annotated_frame)
             if landmarks is not None:
                 skeleton_data = skeleton_generator.generate_skeleton(landmarks)
             else:
+                # Если не удалось получить новые landmarks, берём предыдущий скелет
                 skeleton_data = animation_frames[-1] if animation_frames else {}
             animation_frames.append(skeleton_data)
+        # Собираем обработанные кадры в MP4
         height, width = processed_frames[0].shape[:2]
+        fps = 10
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         temp_video = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
         out = cv2.VideoWriter(temp_video.name, fourcc, fps, (width, height))
 def process_video_upload(uploaded_file, components, processed_file, db, is_gif, col1, col2):
     """
     Handle video/GIF file upload processing.
+    Shows the original file in the left column and processed MP4 in the right column.
     """
     pose_detector, skeleton_generator, animation_exporter = components
     # Считываем байты файла
     file_bytes = uploaded_file.read()
+    # В зависимости от того, GIF это или нет,
+    # в "Original" показываем либо st.image (для GIF), либо st.video (для обычного видео).
+    with col1:
+        if is_gif:
+            st.image(file_bytes, use_column_width=True)
+        else:
+            st.video(file_bytes)
+    # Сохраняем во временный файл для дальнейшей обработки
+    temp_input = tempfile.NamedTemporaryFile(
+        suffix=('.gif' if is_gif else '.mp4'), delete=False
+    )
     temp_input.write(file_bytes)
     temp_input.seek(0)
     video_path = temp_input.name
     if is_gif:
         processed_video_path, animation_frames = process_gif(video_path, pose_detector, skeleton_generator)
     else:
         processed_video_path, animation_frames = process_video(video_path, pose_detector, skeleton_generator)
     if not animation_frames:
         raise ValueError("No poses detected in the video/gif")
+    # Сохраняем данные в БД
     save_video_data(db, processed_file.id, animation_frames)
     animation_data_binary = animation_exporter.export_animation(animation_frames)
+    # Показываем результат (MP4) в правой колонке
     with col2:
         if processed_video_path:
             with open(processed_video_path, "rb") as f:
 def save_video_data(db, file_id: int, animation_frames: list):
     from database import PoseData
     for frame_num, frame_data in enumerate(animation_frames):
+        # frame_data может быть пустым словарём, если не удалось получить landmarks
+        if not frame_data:
+            frame_data = {}
         pose_data = PoseData(file_id=file_id, frame_number=frame_num, landmarks=frame_data)
         db.add(pose_data)
     db.commit()