Spaces:

RafaG
/

ViralCutterPRO

Sleeping

App Files Files Community

RafaG commited on Dec 23, 2025

Commit

786fba9

verified ·

1 Parent(s): d0d86a9

Upload 23 files

Browse files

Files changed (5) hide show

scripts/burn_subtitles.py +1 -1
scripts/create_viral_segments.py +50 -2
scripts/edit_video.py +70 -21
scripts/one_face.py +16 -13
scripts/two_face.py +6 -5

scripts/burn_subtitles.py CHANGED Viewed

@@ -20,7 +20,7 @@ def burn_video_file(video_path, subtitle_path, output_path):
             '-vf', f"subtitles='{subtitle_file_ffmpeg}'",
             '-c:v', encoder,
             '-preset', preset,
-            '-b:v', '5M',
             '-pix_fmt', 'yuv420p',
             '-c:a', 'copy',
             output_path

             '-vf', f"subtitles='{subtitle_file_ffmpeg}'",
             '-c:v', encoder,
             '-preset', preset,
+            '-b:v', '15M',
             '-pix_fmt', 'yuv420p',
             '-c:a', 'copy',
             output_path

scripts/create_viral_segments.py CHANGED Viewed

@@ -4,6 +4,15 @@ import re
 import sys
 import time
 import ast
 # Tenta importar bibliotecas de IA opcionalmente
 try:
@@ -27,7 +36,9 @@ except ImportError:
 def clean_json_response(response_text):
     """
     Limpa a resposta focando em encontrar o objeto JSON que contém a chave "segments".
-    Estratégia: Busca a palavra "segments", encontra o '{' anterior e usa raw_decode.
     """
     if not isinstance(response_text, str):
         response_text = str(response_text)
@@ -120,6 +131,40 @@ def clean_json_response(response_text):
             return json.loads(match.group(1))
     except:
         pass
     return {"segments": []}
@@ -217,6 +262,9 @@ def call_g4f(prompt, model_name="gpt-4o-mini"):
                 time.sleep(base_wait)
                 continue
             try:
                 return json.dumps(response, ensure_ascii=False)
             except:
@@ -225,7 +273,7 @@ def call_g4f(prompt, model_name="gpt-4o-mini"):
         except Exception as e:
             print(f"[WARN] Erro na API do G4F (Tentativa {attempt+1}/{max_retries}): {e}")
             if attempt < max_retries - 1:
-                wait_time = base_wait * (attempt + 1)
                 time.sleep(wait_time)
     print(f"Falha crítica após {max_retries} tentativas no G4F.")

 import sys
 import time
 import ast
+import io
+# Configura stdout para evitar erros de encoding no Windows (substitui caracteres inválidos por ?)
+if sys.stdout and hasattr(sys.stdout, 'buffer'):
+    try:
+        # Mantém encoding original mas ignora erros (substitui por ?)
+        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding=sys.stdout.encoding or 'utf-8', errors='replace', line_buffering=True)
+    except:
+        pass
 # Tenta importar bibliotecas de IA opcionalmente
 try:
 def clean_json_response(response_text):
     """
     Limpa a resposta focando em encontrar o objeto JSON que contém a chave "segments".
+    Estratégia:
+    1. Busca a palavra "segments", encontra o '{' anterior e usa raw_decode.
+    2. Fallback: Parsear lista de segmentos item a item (recuperação de JSON truncado).
     """
     if not isinstance(response_text, str):
         response_text = str(response_text)
             return json.loads(match.group(1))
     except:
         pass
+    # 4. LAST RESORT: Fragment Parser (Para JSON truncado/incompleto)
+    # Procura por "segments": [ e tenta parsear item por item
+    try:
+        match_list = re.search(r'"segments"\s*:\s*\[', response_text)
+        if match_list:
+            start_pos = match_list.end()
+            current_pos = start_pos
+            found_segments = []
+            decoder = json.JSONDecoder()
+            while True:
+                while current_pos < len(response_text) and response_text[current_pos] in ' \t\n\r,':
+                    current_pos += 1
+                if current_pos >= len(response_text):
+                    break
+                if response_text[current_pos] == ']':
+                    break
+                try:
+                    obj, end_pos = decoder.raw_decode(response_text[current_pos:])
+                    if isinstance(obj, dict):
+                        found_segments.append(obj)
+                    current_pos += end_pos
+                except json.JSONDecodeError:
+                    break
+            if found_segments:
+                print(f"[INFO] Recuperado {len(found_segments)} segmentos de JSON truncado.")
+                return {"segments": found_segments}
+    except:
+        pass
     return {"segments": []}
                 time.sleep(base_wait)
                 continue
+            if isinstance(response, str):
+                return response
             try:
                 return json.dumps(response, ensure_ascii=False)
             except:
         except Exception as e:
             print(f"[WARN] Erro na API do G4F (Tentativa {attempt+1}/{max_retries}): {e}")
             if attempt < max_retries - 1:
+                wait_time = base_wait * (2 ** attempt)
                 time.sleep(wait_time)
     print(f"Falha crítica após {max_retries} tentativas no G4F.")

scripts/edit_video.py CHANGED Viewed

@@ -54,6 +54,24 @@ def get_best_encoder():
     CACHED_ENCODER = ("libx264", "ultrafast")
     return CACHED_ENCODER
 def get_center_bbox(bbox):
     # bbox: [x1, y1, x2, y2]
     return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
@@ -107,9 +125,8 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Target dimensions (9:16)
-    target_width = 1080
-    target_height = 1920
     encoder_name, encoder_preset = get_best_encoder()
@@ -130,8 +147,8 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
     # If using hardware encoder, we might want to set bitrate to ensure quality
     if "nvenc" in encoder_name or "amf" in encoder_name:
-         ffmpeg_cmd.extend(["-b:v", "5M"])
     process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
     while True:
@@ -140,9 +157,9 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
             break
         if no_face_mode == "zoom":
-             result = crop_center_zoom(frame)
         else:
-             result = resize_with_padding(frame)
         try:
             # Write raw bytes to ffmpeg stdin
@@ -172,7 +189,7 @@ def finalize_video(input_file, output_file, index, fps, project_folder, final_fo
             "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
             "-i", output_file,
             "-i", audio_file,
-            "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "5M",
             "-c:a", "aac", "-b:a", "192k",
             "-r", str(fps),
             final_output
@@ -234,8 +251,10 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
         frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
         next_detection_frame = 0
         current_interval = int(5 * fps) # Initial guess
@@ -335,9 +354,9 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
                 current_faces = last_detected_faces
             else:
                 if no_face_mode == "zoom":
-                    result = crop_center_zoom(frame)
                 else:
-                    result = resize_with_padding(frame)
                 coordinate_log.append({"frame": frame_index, "faces": []})
                 out.write(result)
                 continue
@@ -345,18 +364,18 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
             last_frame_face_positions = current_faces
             if hasattr(current_faces, '__len__') and len(current_faces) == 2:
-                 result = crop_and_resize_two_faces(frame, current_faces)
             else:
                  # Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection
                  # If 1 face: [ (x,y,w,h) ]
                  if hasattr(current_faces, '__len__') and len(current_faces) > 0:
                      f = current_faces[0]
-                     result = crop_and_resize_single_face(frame, f)
                  else:
                      if no_face_mode == "zoom":
-                         result = crop_center_zoom(frame)
                      else:
-                         result = resize_with_padding(frame)
             out.write(result)
@@ -388,9 +407,13 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
     fps = cap.get(cv2.CAP_PROP_FPS)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
     # Logic copied from generate_short_mediapipe
     detection_interval = int(2 * fps) # Default check every 2 seconds
@@ -448,9 +471,9 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
         else:
             # No face detected for a while -> Center/Padding fallback
             if no_face_mode == "zoom":
-                result = crop_center_zoom(frame)
             else:
-                result = resize_with_padding(frame)
             out.write(result)
             continue
@@ -462,7 +485,7 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
         else:
              face_bbox = current_faces # Should be handled
-        result = crop_and_resize_single_face(frame, face_bbox)
         out.write(result)
     cap.release()
@@ -488,9 +511,12 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Using mp4v for container, but final mux will fix encoding
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
     # Dynamic Interval Logic
     next_detection_frame = 0
@@ -608,6 +634,22 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
                     is_talking = 1.0 if mar > active_speaker_mar else 0.0
             # Update Activity State - Two Pass for Global Motion Compensation
             if focus_active_speaker and faces:
                 # Pass 1: Global Motion (Camera Shake) Calculation
@@ -763,7 +805,8 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
             # -----------------------------
             # Fallback Lookahead: If detection fails or partial
-            if len(faces) < target_faces:
                 # Try 1 frame ahead
                 ret2, frame2 = cap.read()
                 if ret2 and frame2 is not None:
@@ -949,6 +992,12 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
             else:
                 result = resize_with_padding(frame)
             out.write(result)
             continue
         last_frame_face_positions = current_faces

     CACHED_ENCODER = ("libx264", "ultrafast")
     return CACHED_ENCODER
+def get_target_resolution(width, height):
+    """
+    Calculate target 9:16 resolution based on input size.
+    Preserves 4K height if available.
+    """
+    # Use max of 1920 or input height to avoid downscaling 4K content
+    # If input is 4K (H=2160), use 2160.
+    target_h = max(1920, height)
+    # Ensure divisible by 2
+    if target_h % 2 != 0: target_h -= 1
+    # Calculate width for 9:16
+    target_w = int(target_h * 9 / 16)
+    if target_w % 2 != 0: target_w -= 1
+    return target_w, target_h
 def get_center_bbox(bbox):
     # bbox: [x1, y1, x2, y2]
     return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Target dimensions (9:16)
+    target_width, target_height = get_target_resolution(width, height)
+    print(f"Target Resolution: {target_width}x{target_height}")
     encoder_name, encoder_preset = get_best_encoder()
     # If using hardware encoder, we might want to set bitrate to ensure quality
     if "nvenc" in encoder_name or "amf" in encoder_name:
+         ffmpeg_cmd.extend(["-b:v", "15M"])
     process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
     while True:
             break
         if no_face_mode == "zoom":
+             result = crop_center_zoom(frame, (target_width, target_height))
         else:
+             result = resize_with_padding(frame, (target_width, target_height))
         try:
             # Write raw bytes to ffmpeg stdin
             "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
             "-i", output_file,
             "-i", audio_file,
+            "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "15M",
             "-c:a", "aac", "-b:a", "192k",
             "-r", str(fps),
             final_output
         frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        target_width, target_height = get_target_resolution(frame_width, frame_height)
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
         next_detection_frame = 0
         current_interval = int(5 * fps) # Initial guess
                 current_faces = last_detected_faces
             else:
                 if no_face_mode == "zoom":
+                    result = crop_center_zoom(frame, (target_width, target_height))
                 else:
+                    result = resize_with_padding(frame, (target_width, target_height))
                 coordinate_log.append({"frame": frame_index, "faces": []})
                 out.write(result)
                 continue
             last_frame_face_positions = current_faces
             if hasattr(current_faces, '__len__') and len(current_faces) == 2:
+                 result = crop_and_resize_two_faces(frame, current_faces, target_size=(target_width, target_height))
             else:
                  # Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection
                  # If 1 face: [ (x,y,w,h) ]
                  if hasattr(current_faces, '__len__') and len(current_faces) > 0:
                      f = current_faces[0]
+                     result = crop_and_resize_single_face(frame, f, target_size=(target_width, target_height))
                  else:
                      if no_face_mode == "zoom":
+                         result = crop_center_zoom(frame, (target_width, target_height))
                      else:
+                         result = resize_with_padding(frame, (target_width, target_height))
             out.write(result)
     fps = cap.get(cv2.CAP_PROP_FPS)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    target_width, target_height = get_target_resolution(frame_width, frame_height)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
     # Logic copied from generate_short_mediapipe
     detection_interval = int(2 * fps) # Default check every 2 seconds
         else:
             # No face detected for a while -> Center/Padding fallback
             if no_face_mode == "zoom":
+                result = crop_center_zoom(frame, (target_width, target_height))
             else:
+                result = resize_with_padding(frame, (target_width, target_height))
             out.write(result)
             continue
         else:
              face_bbox = current_faces # Should be handled
+        result = crop_and_resize_single_face(frame, face_bbox, target_size=(target_width, target_height))
         out.write(result)
     cap.release()
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    target_width, target_height = get_target_resolution(frame_width, frame_height)
+    print(f"Target Resolution: {target_width}x{target_height}")
     # Using mp4v for container, but final mux will fix encoding
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
     # Dynamic Interval Logic
     next_detection_frame = 0
                     is_talking = 1.0 if mar > active_speaker_mar else 0.0
+            # --- CROWD MODE LOGIC ---
+            # If too many faces, don't even try to track. Fallback to No-Face logic (Zoom/Padding)
+            CROWD_THRESHOLD = 7
+            # FIX: Use last_raw_faces (before size filtering) so we count background people too!
+            is_crowd = len(last_raw_faces) >= CROWD_THRESHOLD
+            if is_crowd:
+                print(f"DEBUG: Crowd Mode Active! {len(faces)} faces >= {CROWD_THRESHOLD}. Triggering Fallback (No Face Mode).")
+                faces = []
+                valid_faces = [] # CAUTION: Must clear strict backup too!
+                # FORCE RESET HISTORY so it doesn't "stick" to the last face found
+                last_detected_faces = None
+                transition_frames = []
+                faces_activity_state = []
+                zoom_ema_bbox = None # Reset smoothing too
+            # ---------------------------
             # Update Activity State - Two Pass for Global Motion Compensation
             if focus_active_speaker and faces:
                 # Pass 1: Global Motion (Camera Shake) Calculation
             # -----------------------------
             # Fallback Lookahead: If detection fails or partial
+            # But DO NOT look ahead if we are in Crowd Mode (we explicitly wanted 0 faces)
+            if len(faces) < target_faces and not is_crowd:
                 # Try 1 frame ahead
                 ret2, frame2 = cap.read()
                 if ret2 and frame2 is not None:
             else:
                 result = resize_with_padding(frame)
             out.write(result)
+            timeline_frames.append((frame_index, "1")) # Fix: Ensure fallback is treated as single face for subs
+            # Fix XML Log sync (Empty faces for fallback)
+            coords_entry = {"frame": frame_index, "src_size": [frame_width, frame_height], "faces": []}
+            coordinate_log.append(coords_entry)
             continue
         last_frame_face_positions = current_faces

scripts/one_face.py CHANGED Viewed

@@ -4,15 +4,16 @@ import os
 import subprocess
 import mediapipe as mp
-def crop_and_resize_single_face(frame, face):
         frame_height, frame_width = frame.shape[:2]
         x, y, w, h = face
         face_center_x = x + w // 2
         face_center_y = y + h // 2
-        # Cálculo da proporção desejada (9:16)
-        target_aspect_ratio = 9 / 16
         # Cálculo da área de corte para evitar barras pretas
         if frame_width / frame_height > target_aspect_ratio:
@@ -28,15 +29,16 @@ def crop_and_resize_single_face(frame, face):
         crop_x2 = crop_x + new_width
         crop_y2 = crop_y + new_height
-        # Recorte e redimensionamento para 1080x1920 (9:16)
         crop_img = frame[crop_y:crop_y2, crop_x:crop_x2]
-        resized = cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA)
         return resized
-def resize_with_padding(frame):
         frame_height, frame_width = frame.shape[:2]
-        target_aspect_ratio = 9 / 16
         if frame_width / frame_height > target_aspect_ratio:
             new_width = frame_width
@@ -56,7 +58,7 @@ def resize_with_padding(frame):
         result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame
         # Redimensionar para as dimensões finais
-        return cv2.resize(result, (1080, 1920), interpolation=cv2.INTER_AREA)
 def detect_face_or_body(frame, face_detection, face_mesh, pose):
     # Converter a imagem para RGB
@@ -108,12 +110,13 @@ def detect_face_or_body(frame, face_detection, face_mesh, pose):
     return detections if detections else None
-def crop_center_zoom(frame):
     """
-    Crops the center of the frame to fill 9:16 aspect ratio (Zoom effect).
     """
     frame_height, frame_width = frame.shape[:2]
-    target_aspect_ratio = 9 / 16
     # Calculate crop dimensions to FILL the target ratio
     if frame_width / frame_height > target_aspect_ratio:
@@ -134,6 +137,6 @@ def crop_center_zoom(frame):
     crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
-    # Resize to final 1080x1920
-    return cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA)

 import subprocess
 import mediapipe as mp
+def crop_and_resize_single_face(frame, face, target_size=(1080, 1920)):
         frame_height, frame_width = frame.shape[:2]
+        target_w, target_h = target_size
         x, y, w, h = face
         face_center_x = x + w // 2
         face_center_y = y + h // 2
+        # Cálculo da proporção desejada
+        target_aspect_ratio = target_w / target_h
         # Cálculo da área de corte para evitar barras pretas
         if frame_width / frame_height > target_aspect_ratio:
         crop_x2 = crop_x + new_width
         crop_y2 = crop_y + new_height
+        # Recorte e redimensionamento
         crop_img = frame[crop_y:crop_y2, crop_x:crop_x2]
+        resized = cv2.resize(crop_img, target_size, interpolation=cv2.INTER_AREA)
         return resized
+def resize_with_padding(frame, target_size=(1080, 1920)):
         frame_height, frame_width = frame.shape[:2]
+        target_w, target_h = target_size
+        target_aspect_ratio = target_w / target_h
         if frame_width / frame_height > target_aspect_ratio:
             new_width = frame_width
         result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame
         # Redimensionar para as dimensões finais
+        return cv2.resize(result, target_size, interpolation=cv2.INTER_AREA)
 def detect_face_or_body(frame, face_detection, face_mesh, pose):
     # Converter a imagem para RGB
     return detections if detections else None
+def crop_center_zoom(frame, target_size=(1080, 1920)):
     """
+    Crops the center of the frame to fill target ratio (Zoom effect).
     """
     frame_height, frame_width = frame.shape[:2]
+    target_w, target_h = target_size
+    target_aspect_ratio = target_w / target_h
     # Calculate crop dimensions to FILL the target ratio
     if frame_width / frame_height > target_aspect_ratio:
     crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
+    # Resize to final dimensions
+    return cv2.resize(crop_img, target_size, interpolation=cv2.INTER_AREA)

scripts/two_face.py CHANGED Viewed

@@ -78,18 +78,19 @@ def crop_and_maintain_ar(frame, face_box, target_w, target_h, zoom_out_factor=2.
     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
     return resized
-def crop_and_resize_two_faces(frame, face_positions, zoom_out_factor=2.2):
     """
     Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical
-    1080x1920 onde cada rosto ocupa metade da tela (1080x960).
     """
     # Target dimensoes para cada metade
-    target_w = 1080
-    target_h = 960
     # Se não temos 2 faces, fallback (segurança)
     if len(face_positions) < 2:
-        return np.zeros((1920, 1080, 3), dtype=np.uint8)
     # Primeiro rosto (Topo)
     face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor)

     resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
     return resized
+def crop_and_resize_two_faces(frame, face_positions, target_size=(1080, 1920), zoom_out_factor=2.2):
     """
     Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical
+    onde cada rosto ocupa metade da tela.
     """
     # Target dimensoes para cada metade
+    final_w, final_h = target_size
+    target_w = final_w
+    target_h = final_h // 2
     # Se não temos 2 faces, fallback (segurança)
     if len(face_positions) < 2:
+        return np.zeros((final_h, final_w, 3), dtype=np.uint8)
     # Primeiro rosto (Topo)
     face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor)