RafaG commited on
Commit
1496c35
·
verified ·
1 Parent(s): 52b7b7a

Upload 24 files

Browse files
scripts/burn_subtitles.py CHANGED
@@ -20,7 +20,7 @@ def burn_video_file(video_path, subtitle_path, output_path):
20
  '-vf', f"subtitles='{subtitle_file_ffmpeg}'",
21
  '-c:v', encoder,
22
  '-preset', preset,
23
- '-b:v', '15M',
24
  '-pix_fmt', 'yuv420p',
25
  '-c:a', 'copy',
26
  output_path
 
20
  '-vf', f"subtitles='{subtitle_file_ffmpeg}'",
21
  '-c:v', encoder,
22
  '-preset', preset,
23
+ '-b:v', '5M',
24
  '-pix_fmt', 'yuv420p',
25
  '-c:a', 'copy',
26
  output_path
scripts/edit_video.py CHANGED
@@ -54,24 +54,6 @@ def get_best_encoder():
54
  CACHED_ENCODER = ("libx264", "ultrafast")
55
  return CACHED_ENCODER
56
 
57
- def get_target_resolution(width, height):
58
- """
59
- Calculate target 9:16 resolution based on input size.
60
- Preserves 4K height if available.
61
- """
62
- # Use max of 1920 or input height to avoid downscaling 4K content
63
- # If input is 4K (H=2160), use 2160.
64
- target_h = max(1920, height)
65
-
66
- # Ensure divisible by 2
67
- if target_h % 2 != 0: target_h -= 1
68
-
69
- # Calculate width for 9:16
70
- target_w = int(target_h * 9 / 16)
71
- if target_w % 2 != 0: target_w -= 1
72
-
73
- return target_w, target_h
74
-
75
  def get_center_bbox(bbox):
76
  # bbox: [x1, y1, x2, y2]
77
  return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
@@ -125,8 +107,9 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
125
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
126
 
127
  # Target dimensions (9:16)
128
- target_width, target_height = get_target_resolution(width, height)
129
- print(f"Target Resolution: {target_width}x{target_height}")
 
130
 
131
  encoder_name, encoder_preset = get_best_encoder()
132
 
@@ -147,8 +130,8 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
147
 
148
  # If using hardware encoder, we might want to set bitrate to ensure quality
149
  if "nvenc" in encoder_name or "amf" in encoder_name:
150
- ffmpeg_cmd.extend(["-b:v", "15M"])
151
-
152
  process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
153
 
154
  while True:
@@ -157,9 +140,9 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
157
  break
158
 
159
  if no_face_mode == "zoom":
160
- result = crop_center_zoom(frame, (target_width, target_height))
161
  else:
162
- result = resize_with_padding(frame, (target_width, target_height))
163
 
164
  try:
165
  # Write raw bytes to ffmpeg stdin
@@ -189,7 +172,7 @@ def finalize_video(input_file, output_file, index, fps, project_folder, final_fo
189
  "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
190
  "-i", output_file,
191
  "-i", audio_file,
192
- "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "15M",
193
  "-c:a", "aac", "-b:a", "192k",
194
  "-r", str(fps),
195
  final_output
@@ -251,10 +234,8 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
251
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
252
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
253
 
254
- target_width, target_height = get_target_resolution(frame_width, frame_height)
255
-
256
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
257
- out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
258
 
259
  next_detection_frame = 0
260
  current_interval = int(5 * fps) # Initial guess
@@ -354,9 +335,9 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
354
  current_faces = last_detected_faces
355
  else:
356
  if no_face_mode == "zoom":
357
- result = crop_center_zoom(frame, (target_width, target_height))
358
  else:
359
- result = resize_with_padding(frame, (target_width, target_height))
360
  coordinate_log.append({"frame": frame_index, "faces": []})
361
  out.write(result)
362
  continue
@@ -364,18 +345,18 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
364
  last_frame_face_positions = current_faces
365
 
366
  if hasattr(current_faces, '__len__') and len(current_faces) == 2:
367
- result = crop_and_resize_two_faces(frame, current_faces, target_size=(target_width, target_height))
368
  else:
369
  # Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection
370
  # If 1 face: [ (x,y,w,h) ]
371
  if hasattr(current_faces, '__len__') and len(current_faces) > 0:
372
  f = current_faces[0]
373
- result = crop_and_resize_single_face(frame, f, target_size=(target_width, target_height))
374
  else:
375
  if no_face_mode == "zoom":
376
- result = crop_center_zoom(frame, (target_width, target_height))
377
  else:
378
- result = resize_with_padding(frame, (target_width, target_height))
379
 
380
  out.write(result)
381
 
@@ -407,13 +388,9 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
407
 
408
  fps = cap.get(cv2.CAP_PROP_FPS)
409
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
410
- frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
411
- frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
412
-
413
- target_width, target_height = get_target_resolution(frame_width, frame_height)
414
 
415
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
416
- out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
417
 
418
  # Logic copied from generate_short_mediapipe
419
  detection_interval = int(2 * fps) # Default check every 2 seconds
@@ -471,9 +448,9 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
471
  else:
472
  # No face detected for a while -> Center/Padding fallback
473
  if no_face_mode == "zoom":
474
- result = crop_center_zoom(frame, (target_width, target_height))
475
  else:
476
- result = resize_with_padding(frame, (target_width, target_height))
477
  out.write(result)
478
  continue
479
 
@@ -485,7 +462,7 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
485
  else:
486
  face_bbox = current_faces # Should be handled
487
 
488
- result = crop_and_resize_single_face(frame, face_bbox, target_size=(target_width, target_height))
489
  out.write(result)
490
 
491
  cap.release()
@@ -511,12 +488,9 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
511
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
512
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
513
 
514
- target_width, target_height = get_target_resolution(frame_width, frame_height)
515
- print(f"Target Resolution: {target_width}x{target_height}")
516
-
517
  # Using mp4v for container, but final mux will fix encoding
518
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
519
- out = cv2.VideoWriter(output_file, fourcc, fps, (target_width, target_height))
520
 
521
  # Dynamic Interval Logic
522
  next_detection_frame = 0
 
54
  CACHED_ENCODER = ("libx264", "ultrafast")
55
  return CACHED_ENCODER
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def get_center_bbox(bbox):
58
  # bbox: [x1, y1, x2, y2]
59
  return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
 
107
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
108
 
109
  # Target dimensions (9:16)
110
+
111
+ target_width = 1080
112
+ target_height = 1920
113
 
114
  encoder_name, encoder_preset = get_best_encoder()
115
 
 
130
 
131
  # If using hardware encoder, we might want to set bitrate to ensure quality
132
  if "nvenc" in encoder_name or "amf" in encoder_name:
133
+ ffmpeg_cmd.extend(["-b:v", "5M"])
134
+
135
  process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
136
 
137
  while True:
 
140
  break
141
 
142
  if no_face_mode == "zoom":
143
+ result = crop_center_zoom(frame)
144
  else:
145
+ result = resize_with_padding(frame)
146
 
147
  try:
148
  # Write raw bytes to ffmpeg stdin
 
172
  "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
173
  "-i", output_file,
174
  "-i", audio_file,
175
+ "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "5M",
176
  "-c:a", "aac", "-b:a", "192k",
177
  "-r", str(fps),
178
  final_output
 
234
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
235
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
236
 
 
 
237
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
238
+ out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
239
 
240
  next_detection_frame = 0
241
  current_interval = int(5 * fps) # Initial guess
 
335
  current_faces = last_detected_faces
336
  else:
337
  if no_face_mode == "zoom":
338
+ result = crop_center_zoom(frame)
339
  else:
340
+ result = resize_with_padding(frame)
341
  coordinate_log.append({"frame": frame_index, "faces": []})
342
  out.write(result)
343
  continue
 
345
  last_frame_face_positions = current_faces
346
 
347
  if hasattr(current_faces, '__len__') and len(current_faces) == 2:
348
+ result = crop_and_resize_two_faces(frame, current_faces)
349
  else:
350
  # Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection
351
  # If 1 face: [ (x,y,w,h) ]
352
  if hasattr(current_faces, '__len__') and len(current_faces) > 0:
353
  f = current_faces[0]
354
+ result = crop_and_resize_single_face(frame, f)
355
  else:
356
  if no_face_mode == "zoom":
357
+ result = crop_center_zoom(frame)
358
  else:
359
+ result = resize_with_padding(frame)
360
 
361
  out.write(result)
362
 
 
388
 
389
  fps = cap.get(cv2.CAP_PROP_FPS)
390
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
 
 
 
391
 
392
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
393
+ out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
394
 
395
  # Logic copied from generate_short_mediapipe
396
  detection_interval = int(2 * fps) # Default check every 2 seconds
 
448
  else:
449
  # No face detected for a while -> Center/Padding fallback
450
  if no_face_mode == "zoom":
451
+ result = crop_center_zoom(frame)
452
  else:
453
+ result = resize_with_padding(frame)
454
  out.write(result)
455
  continue
456
 
 
462
  else:
463
  face_bbox = current_faces # Should be handled
464
 
465
+ result = crop_and_resize_single_face(frame, face_bbox)
466
  out.write(result)
467
 
468
  cap.release()
 
488
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
489
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
490
 
 
 
 
491
  # Using mp4v for container, but final mux will fix encoding
492
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
493
+ out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920))
494
 
495
  # Dynamic Interval Logic
496
  next_detection_frame = 0
scripts/one_face.py CHANGED
@@ -4,16 +4,15 @@ import os
4
  import subprocess
5
  import mediapipe as mp
6
 
7
- def crop_and_resize_single_face(frame, face, target_size=(1080, 1920)):
8
  frame_height, frame_width = frame.shape[:2]
9
- target_w, target_h = target_size
10
 
11
  x, y, w, h = face
12
  face_center_x = x + w // 2
13
  face_center_y = y + h // 2
14
 
15
- # Cálculo da proporção desejada
16
- target_aspect_ratio = target_w / target_h
17
 
18
  # Cálculo da área de corte para evitar barras pretas
19
  if frame_width / frame_height > target_aspect_ratio:
@@ -29,16 +28,15 @@ def crop_and_resize_single_face(frame, face, target_size=(1080, 1920)):
29
  crop_x2 = crop_x + new_width
30
  crop_y2 = crop_y + new_height
31
 
32
- # Recorte e redimensionamento
33
  crop_img = frame[crop_y:crop_y2, crop_x:crop_x2]
34
- resized = cv2.resize(crop_img, target_size, interpolation=cv2.INTER_AREA)
35
 
36
  return resized
37
 
38
- def resize_with_padding(frame, target_size=(1080, 1920)):
39
  frame_height, frame_width = frame.shape[:2]
40
- target_w, target_h = target_size
41
- target_aspect_ratio = target_w / target_h
42
 
43
  if frame_width / frame_height > target_aspect_ratio:
44
  new_width = frame_width
@@ -58,7 +56,7 @@ def resize_with_padding(frame, target_size=(1080, 1920)):
58
  result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame
59
 
60
  # Redimensionar para as dimensões finais
61
- return cv2.resize(result, target_size, interpolation=cv2.INTER_AREA)
62
 
63
  def detect_face_or_body(frame, face_detection, face_mesh, pose):
64
  # Converter a imagem para RGB
@@ -110,13 +108,12 @@ def detect_face_or_body(frame, face_detection, face_mesh, pose):
110
  return detections if detections else None
111
 
112
 
113
- def crop_center_zoom(frame, target_size=(1080, 1920)):
114
  """
115
- Crops the center of the frame to fill target ratio (Zoom effect).
116
  """
117
  frame_height, frame_width = frame.shape[:2]
118
- target_w, target_h = target_size
119
- target_aspect_ratio = target_w / target_h
120
 
121
  # Calculate crop dimensions to FILL the target ratio
122
  if frame_width / frame_height > target_aspect_ratio:
@@ -137,6 +134,6 @@ def crop_center_zoom(frame, target_size=(1080, 1920)):
137
 
138
  crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
139
 
140
- # Resize to final dimensions
141
- return cv2.resize(crop_img, target_size, interpolation=cv2.INTER_AREA)
142
 
 
4
  import subprocess
5
  import mediapipe as mp
6
 
7
+ def crop_and_resize_single_face(frame, face):
8
  frame_height, frame_width = frame.shape[:2]
 
9
 
10
  x, y, w, h = face
11
  face_center_x = x + w // 2
12
  face_center_y = y + h // 2
13
 
14
+ # Cálculo da proporção desejada (9:16)
15
+ target_aspect_ratio = 9 / 16
16
 
17
  # Cálculo da área de corte para evitar barras pretas
18
  if frame_width / frame_height > target_aspect_ratio:
 
28
  crop_x2 = crop_x + new_width
29
  crop_y2 = crop_y + new_height
30
 
31
+ # Recorte e redimensionamento para 1080x1920 (9:16)
32
  crop_img = frame[crop_y:crop_y2, crop_x:crop_x2]
33
+ resized = cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA)
34
 
35
  return resized
36
 
37
+ def resize_with_padding(frame):
38
  frame_height, frame_width = frame.shape[:2]
39
+ target_aspect_ratio = 9 / 16
 
40
 
41
  if frame_width / frame_height > target_aspect_ratio:
42
  new_width = frame_width
 
56
  result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame
57
 
58
  # Redimensionar para as dimensões finais
59
+ return cv2.resize(result, (1080, 1920), interpolation=cv2.INTER_AREA)
60
 
61
  def detect_face_or_body(frame, face_detection, face_mesh, pose):
62
  # Converter a imagem para RGB
 
108
  return detections if detections else None
109
 
110
 
111
+ def crop_center_zoom(frame):
112
  """
113
+ Crops the center of the frame to fill 9:16 aspect ratio (Zoom effect).
114
  """
115
  frame_height, frame_width = frame.shape[:2]
116
+ target_aspect_ratio = 9 / 16
 
117
 
118
  # Calculate crop dimensions to FILL the target ratio
119
  if frame_width / frame_height > target_aspect_ratio:
 
134
 
135
  crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
136
 
137
+ # Resize to final 1080x1920
138
+ return cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA)
139
 
scripts/two_face.py CHANGED
@@ -78,19 +78,18 @@ def crop_and_maintain_ar(frame, face_box, target_w, target_h, zoom_out_factor=2.
78
  resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
79
  return resized
80
 
81
- def crop_and_resize_two_faces(frame, face_positions, target_size=(1080, 1920), zoom_out_factor=2.2):
82
  """
83
  Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical
84
- onde cada rosto ocupa metade da tela.
85
  """
86
  # Target dimensoes para cada metade
87
- final_w, final_h = target_size
88
- target_w = final_w
89
- target_h = final_h // 2
90
 
91
  # Se não temos 2 faces, fallback (segurança)
92
  if len(face_positions) < 2:
93
- return np.zeros((final_h, final_w, 3), dtype=np.uint8)
94
 
95
  # Primeiro rosto (Topo)
96
  face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor)
 
78
  resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
79
  return resized
80
 
81
+ def crop_and_resize_two_faces(frame, face_positions, zoom_out_factor=2.2):
82
  """
83
  Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical
84
+ 1080x1920 onde cada rosto ocupa metade da tela (1080x960).
85
  """
86
  # Target dimensoes para cada metade
87
+ target_w = 1080
88
+ target_h = 960
 
89
 
90
  # Se não temos 2 faces, fallback (segurança)
91
  if len(face_positions) < 2:
92
+ return np.zeros((1920, 1080, 3), dtype=np.uint8)
93
 
94
  # Primeiro rosto (Topo)
95
  face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor)