Spaces:

RafaG
/

ViralCutterPRO

Sleeping

App Files Files Community

RafaG commited on 16 days ago

Commit

1496c35

verified ·

1 Parent(s): 52b7b7a

Upload 24 files

Browse files

Files changed (4) hide show

scripts/burn_subtitles.py +1 -1
scripts/edit_video.py +20 -46
scripts/one_face.py +13 -16
scripts/two_face.py +5 -6

scripts/burn_subtitles.py CHANGED Viewed

@@ -20,7 +20,7 @@ def burn_video_file(video_path, subtitle_path, output_path):
             '-vf', f"subtitles='{subtitle_file_ffmpeg}'",
             '-c:v', encoder,
             '-preset', preset,
-            '-b:v', '15M',
             '-pix_fmt', 'yuv420p',
             '-c:a', 'copy',
             output_path

             '-vf', f"subtitles='{subtitle_file_ffmpeg}'",
             '-c:v', encoder,
             '-preset', preset,
+            '-b:v', '5M',
             '-pix_fmt', 'yuv420p',
             '-c:a', 'copy',
             output_path

scripts/edit_video.py CHANGED Viewed

@@ -54,24 +54,6 @@ def get_best_encoder():
     CACHED_ENCODER = ("libx264", "ultrafast")
     return CACHED_ENCODER
-def get_target_resolution(width, height):
-    """
-    Calculate target 9:16 resolution based on input size.
-    Preserves 4K height if available.
-    """
-    # Use max of 1920 or input height to avoid downscaling 4K content
-    # If input is 4K (H=2160), use 2160.
-    target_h = max(1920, height)
-    # Ensure divisible by 2
-    if target_h % 2 != 0: target_h -= 1
-    # Calculate width for 9:16
-    target_w = int(target_h * 9 / 16)
-    if target_w % 2 != 0: target_w -= 1
-    return target_w, target_h
 def get_center_bbox(bbox):
     # bbox: [x1, y1, x2, y2]
     return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
@@ -125,8 +107,9 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Target dimensions (9:16)
-    target_width, target_height = get_target_resolution(width, height)
-    print(f"Target Resolution: {target_width}x{target_height}")
     encoder_name, encoder_preset = get_best_encoder()
@@ -147,8 +130,8 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
     # If using hardware encoder, we might want to set bitrate to ensure quality
     if "nvenc" in encoder_name or "amf" in encoder_name:
-         ffmpeg_cmd.extend(["-b:v", "15M"])
     process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
     while True:
@@ -157,9 +140,9 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
             break
         if no_face_mode == "zoom":
-             result = crop_center_zoom(frame, (target_width, target_height))
         else:
-             result = resize_with_padding(frame, (target_width, target_height))
         try:
             # Write raw bytes to ffmpeg stdin
@@ -189,7 +172,7 @@ def finalize_video(input_file, output_file, index, fps, project_folder, final_fo
             "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
             "-i", output_file,
             "-i", audio_file,
-            "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "15M",
             "-c:a", "aac", "-b:a", "192k",
             "-r", str(fps),
             final_output
@@ -251,10 +234,8 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
         frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        target_width, target_height = get_target_resolution(frame_width, frame_height)
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
         next_detection_frame = 0
         current_interval = int(5 * fps) # Initial guess
@@ -354,9 +335,9 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
                 current_faces = last_detected_faces
             else:
                 if no_face_mode == "zoom":
-                    result = crop_center_zoom(frame, (target_width, target_height))
                 else:
-                    result = resize_with_padding(frame, (target_width, target_height))
                 coordinate_log.append({"frame": frame_index, "faces": []})
                 out.write(result)
                 continue
@@ -364,18 +345,18 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
             last_frame_face_positions = current_faces
             if hasattr(current_faces, '__len__') and len(current_faces) == 2:
-                 result = crop_and_resize_two_faces(frame, current_faces, target_size=(target_width, target_height))
             else:
                  # Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection
                  # If 1 face: [ (x,y,w,h) ]
                  if hasattr(current_faces, '__len__') and len(current_faces) > 0:
                      f = current_faces[0]
-                     result = crop_and_resize_single_face(frame, f, target_size=(target_width, target_height))
                  else:
                      if no_face_mode == "zoom":
-                         result = crop_center_zoom(frame, (target_width, target_height))
                      else:
-                         result = resize_with_padding(frame, (target_width, target_height))
             out.write(result)
@@ -407,13 +388,9 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
     fps = cap.get(cv2.CAP_PROP_FPS)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    target_width, target_height = get_target_resolution(frame_width, frame_height)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
     # Logic copied from generate_short_mediapipe
     detection_interval = int(2 * fps) # Default check every 2 seconds
@@ -471,9 +448,9 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
         else:
             # No face detected for a while -> Center/Padding fallback
             if no_face_mode == "zoom":
-                result = crop_center_zoom(frame, (target_width, target_height))
             else:
-                result = resize_with_padding(frame, (target_width, target_height))
             out.write(result)
             continue
@@ -485,7 +462,7 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
         else:
              face_bbox = current_faces # Should be handled
-        result = crop_and_resize_single_face(frame, face_bbox, target_size=(target_width, target_height))
         out.write(result)
     cap.release()
@@ -511,12 +488,9 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    target_width, target_height = get_target_resolution(frame_width, frame_height)
-    print(f"Target Resolution: {target_width}x{target_height}")
     # Using mp4v for container, but final mux will fix encoding
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
     # Dynamic Interval Logic
     next_detection_frame = 0

     CACHED_ENCODER = ("libx264", "ultrafast")
     return CACHED_ENCODER
 def get_center_bbox(bbox):
     # bbox: [x1, y1, x2, y2]
     return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Target dimensions (9:16)
+    target_width = 1080
+    target_height = 1920
     encoder_name, encoder_preset = get_best_encoder()
     # If using hardware encoder, we might want to set bitrate to ensure quality
     if "nvenc" in encoder_name or "amf" in encoder_name:
+         ffmpeg_cmd.extend(["-b:v", "5M"])
     process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
     while True:
             break
         if no_face_mode == "zoom":
+             result = crop_center_zoom(frame)
         else:
+             result = resize_with_padding(frame)
         try:
             # Write raw bytes to ffmpeg stdin
             "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
             "-i", output_file,
             "-i", audio_file,
+            "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "5M",
             "-c:a", "aac", "-b:a", "192k",
             "-r", str(fps),
             final_output
         frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
         next_detection_frame = 0
         current_interval = int(5 * fps) # Initial guess
                 current_faces = last_detected_faces
             else:
                 if no_face_mode == "zoom":
+                    result = crop_center_zoom(frame)
                 else:
+                    result = resize_with_padding(frame)
                 coordinate_log.append({"frame": frame_index, "faces": []})
                 out.write(result)
                 continue
             last_frame_face_positions = current_faces
             if hasattr(current_faces, '__len__') and len(current_faces) == 2:
+                 result = crop_and_resize_two_faces(frame, current_faces)
             else:
                  # Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection
                  # If 1 face: [ (x,y,w,h) ]
                  if hasattr(current_faces, '__len__') and len(current_faces) > 0:
                      f = current_faces[0]
+                     result = crop_and_resize_single_face(frame, f)
                  else:
                      if no_face_mode == "zoom":
+                         result = crop_center_zoom(frame)
                      else:
+                         result = resize_with_padding(frame)
             out.write(result)
     fps = cap.get(cv2.CAP_PROP_FPS)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
     # Logic copied from generate_short_mediapipe
     detection_interval = int(2 * fps) # Default check every 2 seconds
         else:
             # No face detected for a while -> Center/Padding fallback
             if no_face_mode == "zoom":
+                result = crop_center_zoom(frame)
             else:
+                result = resize_with_padding(frame)
             out.write(result)
             continue
         else:
              face_bbox = current_faces # Should be handled
+        result = crop_and_resize_single_face(frame, face_bbox)
         out.write(result)
     cap.release()
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Using mp4v for container, but final mux will fix encoding
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
     # Dynamic Interval Logic
     next_detection_frame = 0

scripts/one_face.py CHANGED Viewed

@@ -4,16 +4,15 @@ import os
 import subprocess
 import mediapipe as mp
-def crop_and_resize_single_face(frame, face, target_size=(1080, 1920)):
         frame_height, frame_width = frame.shape[:2]
-        target_w, target_h = target_size
         x, y, w, h = face
         face_center_x = x + w // 2
         face_center_y = y + h // 2
-        # Cálculo da proporção desejada
-        target_aspect_ratio = target_w / target_h
         # Cálculo da área de corte para evitar barras pretas
         if frame_width / frame_height > target_aspect_ratio:
@@ -29,16 +28,15 @@ def crop_and_resize_single_face(frame, face, target_size=(1080, 1920)):
         crop_x2 = crop_x + new_width
         crop_y2 = crop_y + new_height
-        # Recorte e redimensionamento
         crop_img = frame[crop_y:crop_y2, crop_x:crop_x2]
-        resized = cv2.resize(crop_img, target_size, interpolation=cv2.INTER_AREA)
         return resized
-def resize_with_padding(frame, target_size=(1080, 1920)):
         frame_height, frame_width = frame.shape[:2]
-        target_w, target_h = target_size
-        target_aspect_ratio = target_w / target_h
         if frame_width / frame_height > target_aspect_ratio:
             new_width = frame_width
@@ -58,7 +56,7 @@ def resize_with_padding(frame, target_size=(1080, 1920)):
         result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame
         # Redimensionar para as dimensões finais
-        return cv2.resize(result, target_size, interpolation=cv2.INTER_AREA)
 def detect_face_or_body(frame, face_detection, face_mesh, pose):
     # Converter a imagem para RGB
@@ -110,13 +108,12 @@ def detect_face_or_body(frame, face_detection, face_mesh, pose):
     return detections if detections else None
-def crop_center_zoom(frame, target_size=(1080, 1920)):
     """
-    Crops the center of the frame to fill target ratio (Zoom effect).
     """
     frame_height, frame_width = frame.shape[:2]
-    target_w, target_h = target_size
-    target_aspect_ratio = target_w / target_h
     # Calculate crop dimensions to FILL the target ratio
     if frame_width / frame_height > target_aspect_ratio:
@@ -137,6 +134,6 @@ def crop_center_zoom(frame, target_size=(1080, 1920)):
     crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
-    # Resize to final dimensions
-    return cv2.resize(crop_img, target_size, interpolation=cv2.INTER_AREA)

 import subprocess
 import mediapipe as mp
+def crop_and_resize_single_face(frame, face):
         frame_height, frame_width = frame.shape[:2]
         x, y, w, h = face
         face_center_x = x + w // 2
         face_center_y = y + h // 2
+        # Cálculo da proporção desejada (9:16)
+        target_aspect_ratio = 9 / 16
         # Cálculo da área de corte para evitar barras pretas
         if frame_width / frame_height > target_aspect_ratio:
         crop_x2 = crop_x + new_width
         crop_y2 = crop_y + new_height
+        # Recorte e redimensionamento para 1080x1920 (9:16)
         crop_img = frame[crop_y:crop_y2, crop_x:crop_x2]
+        resized = cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA)
         return resized
+def resize_with_padding(frame):
         frame_height, frame_width = frame.shape[:2]
+        target_aspect_ratio = 9 / 16
         if frame_width / frame_height > target_aspect_ratio:
             new_width = frame_width
         result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame
         # Redimensionar para as dimensões finais
+        return cv2.resize(result, (1080, 1920), interpolation=cv2.INTER_AREA)
 def detect_face_or_body(frame, face_detection, face_mesh, pose):
     # Converter a imagem para RGB
     return detections if detections else None
+def crop_center_zoom(frame):
     """
+    Crops the center of the frame to fill 9:16 aspect ratio (Zoom effect).
     """
     frame_height, frame_width = frame.shape[:2]
+    target_aspect_ratio = 9 / 16
     # Calculate crop dimensions to FILL the target ratio
     if frame_width / frame_height > target_aspect_ratio:
     crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
+    # Resize to final 1080x1920
+    return cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA)

scripts/two_face.py CHANGED Viewed

@@ -78,19 +78,18 @@ def crop_and_maintain_ar(frame, face_box, target_w, target_h, zoom_out_factor=2.
     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
     return resized
-def crop_and_resize_two_faces(frame, face_positions, target_size=(1080, 1920), zoom_out_factor=2.2):
     """
     Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical
-    onde cada rosto ocupa metade da tela.
     """
     # Target dimensoes para cada metade
-    final_w, final_h = target_size
-    target_w = final_w
-    target_h = final_h // 2
     # Se não temos 2 faces, fallback (segurança)
     if len(face_positions) < 2:
-        return np.zeros((final_h, final_w, 3), dtype=np.uint8)
     # Primeiro rosto (Topo)
     face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor)

     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
     return resized
+def crop_and_resize_two_faces(frame, face_positions, zoom_out_factor=2.2):
     """
     Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical
+    1080x1920 onde cada rosto ocupa metade da tela (1080x960).
     """
     # Target dimensoes para cada metade
+    target_w = 1080
+    target_h = 960
     # Se não temos 2 faces, fallback (segurança)
     if len(face_positions) < 2:
+        return np.zeros((1920, 1080, 3), dtype=np.uint8)
     # Primeiro rosto (Topo)
     face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor)