Spaces:
Sleeping
Sleeping
Upload 23 files
Browse files- scripts/burn_subtitles.py +1 -1
- scripts/create_viral_segments.py +50 -2
- scripts/edit_video.py +70 -21
- scripts/one_face.py +16 -13
- scripts/two_face.py +6 -5
scripts/burn_subtitles.py
CHANGED
|
@@ -20,7 +20,7 @@ def burn_video_file(video_path, subtitle_path, output_path):
|
|
| 20 |
'-vf', f"subtitles='{subtitle_file_ffmpeg}'",
|
| 21 |
'-c:v', encoder,
|
| 22 |
'-preset', preset,
|
| 23 |
-
'-b:v', '
|
| 24 |
'-pix_fmt', 'yuv420p',
|
| 25 |
'-c:a', 'copy',
|
| 26 |
output_path
|
|
|
|
| 20 |
'-vf', f"subtitles='{subtitle_file_ffmpeg}'",
|
| 21 |
'-c:v', encoder,
|
| 22 |
'-preset', preset,
|
| 23 |
+
'-b:v', '15M',
|
| 24 |
'-pix_fmt', 'yuv420p',
|
| 25 |
'-c:a', 'copy',
|
| 26 |
output_path
|
scripts/create_viral_segments.py
CHANGED
|
@@ -4,6 +4,15 @@ import re
|
|
| 4 |
import sys
|
| 5 |
import time
|
| 6 |
import ast
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Tenta importar bibliotecas de IA opcionalmente
|
| 9 |
try:
|
|
@@ -27,7 +36,9 @@ except ImportError:
|
|
| 27 |
def clean_json_response(response_text):
|
| 28 |
"""
|
| 29 |
Limpa a resposta focando em encontrar o objeto JSON que contém a chave "segments".
|
| 30 |
-
Estratégia:
|
|
|
|
|
|
|
| 31 |
"""
|
| 32 |
if not isinstance(response_text, str):
|
| 33 |
response_text = str(response_text)
|
|
@@ -120,6 +131,40 @@ def clean_json_response(response_text):
|
|
| 120 |
return json.loads(match.group(1))
|
| 121 |
except:
|
| 122 |
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
return {"segments": []}
|
| 125 |
|
|
@@ -217,6 +262,9 @@ def call_g4f(prompt, model_name="gpt-4o-mini"):
|
|
| 217 |
time.sleep(base_wait)
|
| 218 |
continue
|
| 219 |
|
|
|
|
|
|
|
|
|
|
| 220 |
try:
|
| 221 |
return json.dumps(response, ensure_ascii=False)
|
| 222 |
except:
|
|
@@ -225,7 +273,7 @@ def call_g4f(prompt, model_name="gpt-4o-mini"):
|
|
| 225 |
except Exception as e:
|
| 226 |
print(f"[WARN] Erro na API do G4F (Tentativa {attempt+1}/{max_retries}): {e}")
|
| 227 |
if attempt < max_retries - 1:
|
| 228 |
-
wait_time = base_wait * (
|
| 229 |
time.sleep(wait_time)
|
| 230 |
|
| 231 |
print(f"Falha crítica após {max_retries} tentativas no G4F.")
|
|
|
|
| 4 |
import sys
|
| 5 |
import time
|
| 6 |
import ast
|
| 7 |
+
import io
|
| 8 |
+
|
| 9 |
+
# Configura stdout para evitar erros de encoding no Windows (substitui caracteres inválidos por ?)
|
| 10 |
+
if sys.stdout and hasattr(sys.stdout, 'buffer'):
|
| 11 |
+
try:
|
| 12 |
+
# Mantém encoding original mas ignora erros (substitui por ?)
|
| 13 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding=sys.stdout.encoding or 'utf-8', errors='replace', line_buffering=True)
|
| 14 |
+
except:
|
| 15 |
+
pass
|
| 16 |
|
| 17 |
# Tenta importar bibliotecas de IA opcionalmente
|
| 18 |
try:
|
|
|
|
| 36 |
def clean_json_response(response_text):
|
| 37 |
"""
|
| 38 |
Limpa a resposta focando em encontrar o objeto JSON que contém a chave "segments".
|
| 39 |
+
Estratégia:
|
| 40 |
+
1. Busca a palavra "segments", encontra o '{' anterior e usa raw_decode.
|
| 41 |
+
2. Fallback: Parsear lista de segmentos item a item (recuperação de JSON truncado).
|
| 42 |
"""
|
| 43 |
if not isinstance(response_text, str):
|
| 44 |
response_text = str(response_text)
|
|
|
|
| 131 |
return json.loads(match.group(1))
|
| 132 |
except:
|
| 133 |
pass
|
| 134 |
+
|
| 135 |
+
# 4. LAST RESORT: Fragment Parser (Para JSON truncado/incompleto)
|
| 136 |
+
# Procura por "segments": [ e tenta parsear item por item
|
| 137 |
+
try:
|
| 138 |
+
match_list = re.search(r'"segments"\s*:\s*\[', response_text)
|
| 139 |
+
if match_list:
|
| 140 |
+
start_pos = match_list.end()
|
| 141 |
+
current_pos = start_pos
|
| 142 |
+
found_segments = []
|
| 143 |
+
decoder = json.JSONDecoder()
|
| 144 |
+
|
| 145 |
+
while True:
|
| 146 |
+
while current_pos < len(response_text) and response_text[current_pos] in ' \t\n\r,':
|
| 147 |
+
current_pos += 1
|
| 148 |
+
|
| 149 |
+
if current_pos >= len(response_text):
|
| 150 |
+
break
|
| 151 |
+
|
| 152 |
+
if response_text[current_pos] == ']':
|
| 153 |
+
break
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
obj, end_pos = decoder.raw_decode(response_text[current_pos:])
|
| 157 |
+
if isinstance(obj, dict):
|
| 158 |
+
found_segments.append(obj)
|
| 159 |
+
current_pos += end_pos
|
| 160 |
+
except json.JSONDecodeError:
|
| 161 |
+
break
|
| 162 |
+
|
| 163 |
+
if found_segments:
|
| 164 |
+
print(f"[INFO] Recuperado {len(found_segments)} segmentos de JSON truncado.")
|
| 165 |
+
return {"segments": found_segments}
|
| 166 |
+
except:
|
| 167 |
+
pass
|
| 168 |
|
| 169 |
return {"segments": []}
|
| 170 |
|
|
|
|
| 262 |
time.sleep(base_wait)
|
| 263 |
continue
|
| 264 |
|
| 265 |
+
if isinstance(response, str):
|
| 266 |
+
return response
|
| 267 |
+
|
| 268 |
try:
|
| 269 |
return json.dumps(response, ensure_ascii=False)
|
| 270 |
except:
|
|
|
|
| 273 |
except Exception as e:
|
| 274 |
print(f"[WARN] Erro na API do G4F (Tentativa {attempt+1}/{max_retries}): {e}")
|
| 275 |
if attempt < max_retries - 1:
|
| 276 |
+
wait_time = base_wait * (2 ** attempt)
|
| 277 |
time.sleep(wait_time)
|
| 278 |
|
| 279 |
print(f"Falha crítica após {max_retries} tentativas no G4F.")
|
scripts/edit_video.py
CHANGED
|
@@ -54,6 +54,24 @@ def get_best_encoder():
|
|
| 54 |
CACHED_ENCODER = ("libx264", "ultrafast")
|
| 55 |
return CACHED_ENCODER
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def get_center_bbox(bbox):
|
| 58 |
# bbox: [x1, y1, x2, y2]
|
| 59 |
return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
|
@@ -107,9 +125,8 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
|
|
| 107 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 108 |
|
| 109 |
# Target dimensions (9:16)
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
target_height = 1920
|
| 113 |
|
| 114 |
encoder_name, encoder_preset = get_best_encoder()
|
| 115 |
|
|
@@ -130,8 +147,8 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
|
|
| 130 |
|
| 131 |
# If using hardware encoder, we might want to set bitrate to ensure quality
|
| 132 |
if "nvenc" in encoder_name or "amf" in encoder_name:
|
| 133 |
-
ffmpeg_cmd.extend(["-b:v", "
|
| 134 |
-
|
| 135 |
process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
|
| 136 |
|
| 137 |
while True:
|
|
@@ -140,9 +157,9 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
|
|
| 140 |
break
|
| 141 |
|
| 142 |
if no_face_mode == "zoom":
|
| 143 |
-
result = crop_center_zoom(frame)
|
| 144 |
else:
|
| 145 |
-
result = resize_with_padding(frame)
|
| 146 |
|
| 147 |
try:
|
| 148 |
# Write raw bytes to ffmpeg stdin
|
|
@@ -172,7 +189,7 @@ def finalize_video(input_file, output_file, index, fps, project_folder, final_fo
|
|
| 172 |
"ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
|
| 173 |
"-i", output_file,
|
| 174 |
"-i", audio_file,
|
| 175 |
-
"-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "
|
| 176 |
"-c:a", "aac", "-b:a", "192k",
|
| 177 |
"-r", str(fps),
|
| 178 |
final_output
|
|
@@ -234,8 +251,10 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
|
|
| 234 |
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 235 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 236 |
|
|
|
|
|
|
|
| 237 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 238 |
-
out = cv2.VideoWriter(output_file, fourcc, fps, (
|
| 239 |
|
| 240 |
next_detection_frame = 0
|
| 241 |
current_interval = int(5 * fps) # Initial guess
|
|
@@ -335,9 +354,9 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
|
|
| 335 |
current_faces = last_detected_faces
|
| 336 |
else:
|
| 337 |
if no_face_mode == "zoom":
|
| 338 |
-
result = crop_center_zoom(frame)
|
| 339 |
else:
|
| 340 |
-
result = resize_with_padding(frame)
|
| 341 |
coordinate_log.append({"frame": frame_index, "faces": []})
|
| 342 |
out.write(result)
|
| 343 |
continue
|
|
@@ -345,18 +364,18 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
|
|
| 345 |
last_frame_face_positions = current_faces
|
| 346 |
|
| 347 |
if hasattr(current_faces, '__len__') and len(current_faces) == 2:
|
| 348 |
-
result = crop_and_resize_two_faces(frame, current_faces)
|
| 349 |
else:
|
| 350 |
# Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection
|
| 351 |
# If 1 face: [ (x,y,w,h) ]
|
| 352 |
if hasattr(current_faces, '__len__') and len(current_faces) > 0:
|
| 353 |
f = current_faces[0]
|
| 354 |
-
result = crop_and_resize_single_face(frame, f)
|
| 355 |
else:
|
| 356 |
if no_face_mode == "zoom":
|
| 357 |
-
result = crop_center_zoom(frame)
|
| 358 |
else:
|
| 359 |
-
result = resize_with_padding(frame)
|
| 360 |
|
| 361 |
out.write(result)
|
| 362 |
|
|
@@ -388,9 +407,13 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
|
|
| 388 |
|
| 389 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 390 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
|
| 392 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 393 |
-
out = cv2.VideoWriter(output_file, fourcc, fps, (
|
| 394 |
|
| 395 |
# Logic copied from generate_short_mediapipe
|
| 396 |
detection_interval = int(2 * fps) # Default check every 2 seconds
|
|
@@ -448,9 +471,9 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
|
|
| 448 |
else:
|
| 449 |
# No face detected for a while -> Center/Padding fallback
|
| 450 |
if no_face_mode == "zoom":
|
| 451 |
-
result = crop_center_zoom(frame)
|
| 452 |
else:
|
| 453 |
-
result = resize_with_padding(frame)
|
| 454 |
out.write(result)
|
| 455 |
continue
|
| 456 |
|
|
@@ -462,7 +485,7 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
|
|
| 462 |
else:
|
| 463 |
face_bbox = current_faces # Should be handled
|
| 464 |
|
| 465 |
-
result = crop_and_resize_single_face(frame, face_bbox)
|
| 466 |
out.write(result)
|
| 467 |
|
| 468 |
cap.release()
|
|
@@ -488,9 +511,12 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
|
|
| 488 |
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 489 |
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 490 |
|
|
|
|
|
|
|
|
|
|
| 491 |
# Using mp4v for container, but final mux will fix encoding
|
| 492 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 493 |
-
out = cv2.VideoWriter(output_file, fourcc, fps, (
|
| 494 |
|
| 495 |
# Dynamic Interval Logic
|
| 496 |
next_detection_frame = 0
|
|
@@ -608,6 +634,22 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
|
|
| 608 |
is_talking = 1.0 if mar > active_speaker_mar else 0.0
|
| 609 |
|
| 610 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
# Update Activity State - Two Pass for Global Motion Compensation
|
| 612 |
if focus_active_speaker and faces:
|
| 613 |
# Pass 1: Global Motion (Camera Shake) Calculation
|
|
@@ -763,7 +805,8 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
|
|
| 763 |
# -----------------------------
|
| 764 |
|
| 765 |
# Fallback Lookahead: If detection fails or partial
|
| 766 |
-
if
|
|
|
|
| 767 |
# Try 1 frame ahead
|
| 768 |
ret2, frame2 = cap.read()
|
| 769 |
if ret2 and frame2 is not None:
|
|
@@ -949,6 +992,12 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
|
|
| 949 |
else:
|
| 950 |
result = resize_with_padding(frame)
|
| 951 |
out.write(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 952 |
continue
|
| 953 |
|
| 954 |
last_frame_face_positions = current_faces
|
|
|
|
| 54 |
CACHED_ENCODER = ("libx264", "ultrafast")
|
| 55 |
return CACHED_ENCODER
|
| 56 |
|
| 57 |
+
def get_target_resolution(width, height):
|
| 58 |
+
"""
|
| 59 |
+
Calculate target 9:16 resolution based on input size.
|
| 60 |
+
Preserves 4K height if available.
|
| 61 |
+
"""
|
| 62 |
+
# Use max of 1920 or input height to avoid downscaling 4K content
|
| 63 |
+
# If input is 4K (H=2160), use 2160.
|
| 64 |
+
target_h = max(1920, height)
|
| 65 |
+
|
| 66 |
+
# Ensure divisible by 2
|
| 67 |
+
if target_h % 2 != 0: target_h -= 1
|
| 68 |
+
|
| 69 |
+
# Calculate width for 9:16
|
| 70 |
+
target_w = int(target_h * 9 / 16)
|
| 71 |
+
if target_w % 2 != 0: target_w -= 1
|
| 72 |
+
|
| 73 |
+
return target_w, target_h
|
| 74 |
+
|
| 75 |
def get_center_bbox(bbox):
|
| 76 |
# bbox: [x1, y1, x2, y2]
|
| 77 |
return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
|
|
|
| 125 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 126 |
|
| 127 |
# Target dimensions (9:16)
|
| 128 |
+
target_width, target_height = get_target_resolution(width, height)
|
| 129 |
+
print(f"Target Resolution: {target_width}x{target_height}")
|
|
|
|
| 130 |
|
| 131 |
encoder_name, encoder_preset = get_best_encoder()
|
| 132 |
|
|
|
|
| 147 |
|
| 148 |
# If using hardware encoder, we might want to set bitrate to ensure quality
|
| 149 |
if "nvenc" in encoder_name or "amf" in encoder_name:
|
| 150 |
+
ffmpeg_cmd.extend(["-b:v", "15M"])
|
| 151 |
+
|
| 152 |
process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
|
| 153 |
|
| 154 |
while True:
|
|
|
|
| 157 |
break
|
| 158 |
|
| 159 |
if no_face_mode == "zoom":
|
| 160 |
+
result = crop_center_zoom(frame, (target_width, target_height))
|
| 161 |
else:
|
| 162 |
+
result = resize_with_padding(frame, (target_width, target_height))
|
| 163 |
|
| 164 |
try:
|
| 165 |
# Write raw bytes to ffmpeg stdin
|
|
|
|
| 189 |
"ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
|
| 190 |
"-i", output_file,
|
| 191 |
"-i", audio_file,
|
| 192 |
+
"-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "15M",
|
| 193 |
"-c:a", "aac", "-b:a", "192k",
|
| 194 |
"-r", str(fps),
|
| 195 |
final_output
|
|
|
|
| 251 |
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 252 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 253 |
|
| 254 |
+
target_width, target_height = get_target_resolution(frame_width, frame_height)
|
| 255 |
+
|
| 256 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 257 |
+
out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
|
| 258 |
|
| 259 |
next_detection_frame = 0
|
| 260 |
current_interval = int(5 * fps) # Initial guess
|
|
|
|
| 354 |
current_faces = last_detected_faces
|
| 355 |
else:
|
| 356 |
if no_face_mode == "zoom":
|
| 357 |
+
result = crop_center_zoom(frame, (target_width, target_height))
|
| 358 |
else:
|
| 359 |
+
result = resize_with_padding(frame, (target_width, target_height))
|
| 360 |
coordinate_log.append({"frame": frame_index, "faces": []})
|
| 361 |
out.write(result)
|
| 362 |
continue
|
|
|
|
| 364 |
last_frame_face_positions = current_faces
|
| 365 |
|
| 366 |
if hasattr(current_faces, '__len__') and len(current_faces) == 2:
|
| 367 |
+
result = crop_and_resize_two_faces(frame, current_faces, target_size=(target_width, target_height))
|
| 368 |
else:
|
| 369 |
# Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection
|
| 370 |
# If 1 face: [ (x,y,w,h) ]
|
| 371 |
if hasattr(current_faces, '__len__') and len(current_faces) > 0:
|
| 372 |
f = current_faces[0]
|
| 373 |
+
result = crop_and_resize_single_face(frame, f, target_size=(target_width, target_height))
|
| 374 |
else:
|
| 375 |
if no_face_mode == "zoom":
|
| 376 |
+
result = crop_center_zoom(frame, (target_width, target_height))
|
| 377 |
else:
|
| 378 |
+
result = resize_with_padding(frame, (target_width, target_height))
|
| 379 |
|
| 380 |
out.write(result)
|
| 381 |
|
|
|
|
| 407 |
|
| 408 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 409 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 410 |
+
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 411 |
+
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 412 |
+
|
| 413 |
+
target_width, target_height = get_target_resolution(frame_width, frame_height)
|
| 414 |
|
| 415 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 416 |
+
out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
|
| 417 |
|
| 418 |
# Logic copied from generate_short_mediapipe
|
| 419 |
detection_interval = int(2 * fps) # Default check every 2 seconds
|
|
|
|
| 471 |
else:
|
| 472 |
# No face detected for a while -> Center/Padding fallback
|
| 473 |
if no_face_mode == "zoom":
|
| 474 |
+
result = crop_center_zoom(frame, (target_width, target_height))
|
| 475 |
else:
|
| 476 |
+
result = resize_with_padding(frame, (target_width, target_height))
|
| 477 |
out.write(result)
|
| 478 |
continue
|
| 479 |
|
|
|
|
| 485 |
else:
|
| 486 |
face_bbox = current_faces # Should be handled
|
| 487 |
|
| 488 |
+
result = crop_and_resize_single_face(frame, face_bbox, target_size=(target_width, target_height))
|
| 489 |
out.write(result)
|
| 490 |
|
| 491 |
cap.release()
|
|
|
|
| 511 |
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 512 |
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 513 |
|
| 514 |
+
target_width, target_height = get_target_resolution(frame_width, frame_height)
|
| 515 |
+
print(f"Target Resolution: {target_width}x{target_height}")
|
| 516 |
+
|
| 517 |
# Using mp4v for container, but final mux will fix encoding
|
| 518 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 519 |
+
out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
|
| 520 |
|
| 521 |
# Dynamic Interval Logic
|
| 522 |
next_detection_frame = 0
|
|
|
|
| 634 |
is_talking = 1.0 if mar > active_speaker_mar else 0.0
|
| 635 |
|
| 636 |
|
| 637 |
+
# --- CROWD MODE LOGIC ---
|
| 638 |
+
# If too many faces, don't even try to track. Fallback to No-Face logic (Zoom/Padding)
|
| 639 |
+
CROWD_THRESHOLD = 7
|
| 640 |
+
# FIX: Use last_raw_faces (before size filtering) so we count background people too!
|
| 641 |
+
is_crowd = len(last_raw_faces) >= CROWD_THRESHOLD
|
| 642 |
+
if is_crowd:
|
| 643 |
+
print(f"DEBUG: Crowd Mode Active! {len(faces)} faces >= {CROWD_THRESHOLD}. Triggering Fallback (No Face Mode).")
|
| 644 |
+
faces = []
|
| 645 |
+
valid_faces = [] # CAUTION: Must clear strict backup too!
|
| 646 |
+
# FORCE RESET HISTORY so it doesn't "stick" to the last face found
|
| 647 |
+
last_detected_faces = None
|
| 648 |
+
transition_frames = []
|
| 649 |
+
faces_activity_state = []
|
| 650 |
+
zoom_ema_bbox = None # Reset smoothing too
|
| 651 |
+
# ---------------------------
|
| 652 |
+
|
| 653 |
# Update Activity State - Two Pass for Global Motion Compensation
|
| 654 |
if focus_active_speaker and faces:
|
| 655 |
# Pass 1: Global Motion (Camera Shake) Calculation
|
|
|
|
| 805 |
# -----------------------------
|
| 806 |
|
| 807 |
# Fallback Lookahead: If detection fails or partial
|
| 808 |
+
# But DO NOT look ahead if we are in Crowd Mode (we explicitly wanted 0 faces)
|
| 809 |
+
if len(faces) < target_faces and not is_crowd:
|
| 810 |
# Try 1 frame ahead
|
| 811 |
ret2, frame2 = cap.read()
|
| 812 |
if ret2 and frame2 is not None:
|
|
|
|
| 992 |
else:
|
| 993 |
result = resize_with_padding(frame)
|
| 994 |
out.write(result)
|
| 995 |
+
timeline_frames.append((frame_index, "1")) # Fix: Ensure fallback is treated as single face for subs
|
| 996 |
+
|
| 997 |
+
# Fix XML Log sync (Empty faces for fallback)
|
| 998 |
+
coords_entry = {"frame": frame_index, "src_size": [frame_width, frame_height], "faces": []}
|
| 999 |
+
coordinate_log.append(coords_entry)
|
| 1000 |
+
|
| 1001 |
continue
|
| 1002 |
|
| 1003 |
last_frame_face_positions = current_faces
|
scripts/one_face.py
CHANGED
|
@@ -4,15 +4,16 @@ import os
|
|
| 4 |
import subprocess
|
| 5 |
import mediapipe as mp
|
| 6 |
|
| 7 |
-
def crop_and_resize_single_face(frame, face):
|
| 8 |
frame_height, frame_width = frame.shape[:2]
|
|
|
|
| 9 |
|
| 10 |
x, y, w, h = face
|
| 11 |
face_center_x = x + w // 2
|
| 12 |
face_center_y = y + h // 2
|
| 13 |
|
| 14 |
-
# Cálculo da proporção desejada
|
| 15 |
-
target_aspect_ratio =
|
| 16 |
|
| 17 |
# Cálculo da área de corte para evitar barras pretas
|
| 18 |
if frame_width / frame_height > target_aspect_ratio:
|
|
@@ -28,15 +29,16 @@ def crop_and_resize_single_face(frame, face):
|
|
| 28 |
crop_x2 = crop_x + new_width
|
| 29 |
crop_y2 = crop_y + new_height
|
| 30 |
|
| 31 |
-
# Recorte e redimensionamento
|
| 32 |
crop_img = frame[crop_y:crop_y2, crop_x:crop_x2]
|
| 33 |
-
resized = cv2.resize(crop_img,
|
| 34 |
|
| 35 |
return resized
|
| 36 |
|
| 37 |
-
def resize_with_padding(frame):
|
| 38 |
frame_height, frame_width = frame.shape[:2]
|
| 39 |
-
|
|
|
|
| 40 |
|
| 41 |
if frame_width / frame_height > target_aspect_ratio:
|
| 42 |
new_width = frame_width
|
|
@@ -56,7 +58,7 @@ def resize_with_padding(frame):
|
|
| 56 |
result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame
|
| 57 |
|
| 58 |
# Redimensionar para as dimensões finais
|
| 59 |
-
return cv2.resize(result,
|
| 60 |
|
| 61 |
def detect_face_or_body(frame, face_detection, face_mesh, pose):
|
| 62 |
# Converter a imagem para RGB
|
|
@@ -108,12 +110,13 @@ def detect_face_or_body(frame, face_detection, face_mesh, pose):
|
|
| 108 |
return detections if detections else None
|
| 109 |
|
| 110 |
|
| 111 |
-
def crop_center_zoom(frame):
|
| 112 |
"""
|
| 113 |
-
Crops the center of the frame to fill
|
| 114 |
"""
|
| 115 |
frame_height, frame_width = frame.shape[:2]
|
| 116 |
-
|
|
|
|
| 117 |
|
| 118 |
# Calculate crop dimensions to FILL the target ratio
|
| 119 |
if frame_width / frame_height > target_aspect_ratio:
|
|
@@ -134,6 +137,6 @@ def crop_center_zoom(frame):
|
|
| 134 |
|
| 135 |
crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
|
| 136 |
|
| 137 |
-
# Resize to final
|
| 138 |
-
return cv2.resize(crop_img,
|
| 139 |
|
|
|
|
| 4 |
import subprocess
|
| 5 |
import mediapipe as mp
|
| 6 |
|
| 7 |
+
def crop_and_resize_single_face(frame, face, target_size=(1080, 1920)):
|
| 8 |
frame_height, frame_width = frame.shape[:2]
|
| 9 |
+
target_w, target_h = target_size
|
| 10 |
|
| 11 |
x, y, w, h = face
|
| 12 |
face_center_x = x + w // 2
|
| 13 |
face_center_y = y + h // 2
|
| 14 |
|
| 15 |
+
# Cálculo da proporção desejada
|
| 16 |
+
target_aspect_ratio = target_w / target_h
|
| 17 |
|
| 18 |
# Cálculo da área de corte para evitar barras pretas
|
| 19 |
if frame_width / frame_height > target_aspect_ratio:
|
|
|
|
| 29 |
crop_x2 = crop_x + new_width
|
| 30 |
crop_y2 = crop_y + new_height
|
| 31 |
|
| 32 |
+
# Recorte e redimensionamento
|
| 33 |
crop_img = frame[crop_y:crop_y2, crop_x:crop_x2]
|
| 34 |
+
resized = cv2.resize(crop_img, target_size, interpolation=cv2.INTER_AREA)
|
| 35 |
|
| 36 |
return resized
|
| 37 |
|
| 38 |
+
def resize_with_padding(frame, target_size=(1080, 1920)):
|
| 39 |
frame_height, frame_width = frame.shape[:2]
|
| 40 |
+
target_w, target_h = target_size
|
| 41 |
+
target_aspect_ratio = target_w / target_h
|
| 42 |
|
| 43 |
if frame_width / frame_height > target_aspect_ratio:
|
| 44 |
new_width = frame_width
|
|
|
|
| 58 |
result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame
|
| 59 |
|
| 60 |
# Redimensionar para as dimensões finais
|
| 61 |
+
return cv2.resize(result, target_size, interpolation=cv2.INTER_AREA)
|
| 62 |
|
| 63 |
def detect_face_or_body(frame, face_detection, face_mesh, pose):
|
| 64 |
# Converter a imagem para RGB
|
|
|
|
| 110 |
return detections if detections else None
|
| 111 |
|
| 112 |
|
| 113 |
+
def crop_center_zoom(frame, target_size=(1080, 1920)):
|
| 114 |
"""
|
| 115 |
+
Crops the center of the frame to fill target ratio (Zoom effect).
|
| 116 |
"""
|
| 117 |
frame_height, frame_width = frame.shape[:2]
|
| 118 |
+
target_w, target_h = target_size
|
| 119 |
+
target_aspect_ratio = target_w / target_h
|
| 120 |
|
| 121 |
# Calculate crop dimensions to FILL the target ratio
|
| 122 |
if frame_width / frame_height > target_aspect_ratio:
|
|
|
|
| 137 |
|
| 138 |
crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
|
| 139 |
|
| 140 |
+
# Resize to final dimensions
|
| 141 |
+
return cv2.resize(crop_img, target_size, interpolation=cv2.INTER_AREA)
|
| 142 |
|
scripts/two_face.py
CHANGED
|
@@ -78,18 +78,19 @@ def crop_and_maintain_ar(frame, face_box, target_w, target_h, zoom_out_factor=2.
|
|
| 78 |
resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
|
| 79 |
return resized
|
| 80 |
|
| 81 |
-
def crop_and_resize_two_faces(frame, face_positions, zoom_out_factor=2.2):
|
| 82 |
"""
|
| 83 |
Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical
|
| 84 |
-
|
| 85 |
"""
|
| 86 |
# Target dimensoes para cada metade
|
| 87 |
-
|
| 88 |
-
|
|
|
|
| 89 |
|
| 90 |
# Se não temos 2 faces, fallback (segurança)
|
| 91 |
if len(face_positions) < 2:
|
| 92 |
-
return np.zeros((
|
| 93 |
|
| 94 |
# Primeiro rosto (Topo)
|
| 95 |
face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor)
|
|
|
|
| 78 |
resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
|
| 79 |
return resized
|
| 80 |
|
| 81 |
+
def crop_and_resize_two_faces(frame, face_positions, target_size=(1080, 1920), zoom_out_factor=2.2):
|
| 82 |
"""
|
| 83 |
Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical
|
| 84 |
+
onde cada rosto ocupa metade da tela.
|
| 85 |
"""
|
| 86 |
# Target dimensoes para cada metade
|
| 87 |
+
final_w, final_h = target_size
|
| 88 |
+
target_w = final_w
|
| 89 |
+
target_h = final_h // 2
|
| 90 |
|
| 91 |
# Se não temos 2 faces, fallback (segurança)
|
| 92 |
if len(face_positions) < 2:
|
| 93 |
+
return np.zeros((final_h, final_w, 3), dtype=np.uint8)
|
| 94 |
|
| 95 |
# Primeiro rosto (Topo)
|
| 96 |
face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor)
|