leicam commited on
Commit
48983c6
·
verified ·
1 Parent(s): 09c1b14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -218
app.py CHANGED
@@ -6,16 +6,15 @@ Transcrição + Cortes + Face Tracking
6
  import gradio as gr
7
  import cv2
8
  import numpy as np
 
9
  import whisper
10
  import subprocess
11
  from pathlib import Path
12
  from dataclasses import dataclass
13
- from typing import List, Tuple, Optional, Union
14
  import tempfile
15
  import os
16
  import shutil
17
- import json
18
- import random
19
 
20
  # ======================= DATACLASSES =======================
21
 
@@ -40,49 +39,6 @@ class FaceBox:
40
  center_y: int
41
  confidence: float = 1.0
42
 
43
- # ======================= UTILS =======================
44
-
45
- def resolve_video_path(v: Union[str, dict, None]) -> Optional[str]:
46
- """Gradio pode entregar str (caminho) ou dict. Normaliza para caminho local."""
47
- if v is None:
48
- return None
49
- if isinstance(v, str):
50
- return v
51
- if isinstance(v, dict):
52
- if "name" in v and isinstance(v["name"], str) and os.path.exists(v["name"]):
53
- return v["name"]
54
- if "path" in v and isinstance(v["path"], str) and os.path.exists(v["path"]):
55
- return v["path"]
56
- return v.get("name") or v.get("path")
57
- return None
58
-
59
- def probe_duration(path: str) -> Optional[float]:
60
- """Retorna a duração (s) via ffprobe."""
61
- try:
62
- cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "json", path]
63
- out = subprocess.run(cmd, check=True, capture_output=True)
64
- data = json.loads(out.stdout.decode("utf-8", errors="ignore"))
65
- dur = float(data.get("format", {}).get("duration", 0.0))
66
- return dur if dur > 0 else None
67
- except Exception as e:
68
- print(f"[ffprobe] falhou: {e}")
69
- return None
70
-
71
- def remux_video(src: str) -> str:
72
- """Gera um MP4 remuxado (ajusta PTS/timebase e faststart)."""
73
- fd, tmp_path = tempfile.mkstemp(suffix=".mp4")
74
- os.close(fd)
75
- cmd = [
76
- "ffmpeg", "-y",
77
- "-fflags", "+genpts",
78
- "-i", src,
79
- "-c", "copy",
80
- "-movflags", "+faststart",
81
- tmp_path
82
- ]
83
- subprocess.run(cmd, check=True, capture_output=True)
84
- return tmp_path
85
-
86
  # ======================= FACE TRACKING =======================
87
 
88
  class FaceTracker:
@@ -105,9 +61,9 @@ class FaceTracker:
105
 
106
  self.enabled = self.face_cascade is not None and not self.face_cascade.empty()
107
  if self.enabled:
108
- print("Detector de rostos carregado")
109
  else:
110
- print("Detector de rostos não disponível - usando crop centralizado")
111
 
112
  def detect_faces(self, frame: np.ndarray) -> List[FaceBox]:
113
  if not self.enabled:
@@ -186,147 +142,69 @@ class FaceTracker:
186
 
187
  return (crop_x, crop_y, crop_w, crop_h)
188
 
189
- # ======================= TRANSCRIÇÃO (ROBUSTA) =======================
190
-
191
- def extract_audio_wav_strong(input_video: str, sr: int = 16000) -> str:
192
- """
193
- Extração de áudio à prova de VFR/PTS ruins.
194
- 1) Remuxa o vídeo (ajusta timebase)
195
- 2) Extrai WAV mono 16k
196
- 3) Se o WAV vier curto, faz fallback re-decodificando o original
197
- """
198
- vid_dur = probe_duration(input_video)
199
- print(f"[probe] video: {vid_dur:.2f}s" if vid_dur else "[probe] video: ?")
200
-
201
- remux = remux_video(input_video)
202
- print(f"[remux] -> {remux}")
203
-
204
- fd, wav_path = tempfile.mkstemp(suffix=".wav")
205
- os.close(fd)
206
-
207
- # Tentativa 1 — do remux, convertendo para PCM
208
- cmd1 = [
209
- "ffmpeg", "-y",
210
- "-i", remux,
211
- "-vn",
212
- "-map", "0:a:0?",
213
- "-ac", "1", "-ar", str(sr),
214
- "-c:a", "pcm_s16le",
215
- wav_path
216
- ]
217
- subprocess.run(cmd1, check=True, capture_output=True)
218
- wav_dur = probe_duration(wav_path)
219
- print(f"[probe] wav #1: {wav_dur:.2f}s" if wav_dur else "[probe] wav #1: ?")
220
-
221
- # Fallback — redecodifica direto do original
222
- if vid_dur and (not wav_dur or wav_dur + 2 < vid_dur):
223
- print("[fallback] re-decodificando o arquivo original…")
224
- fd2, wav2 = tempfile.mkstemp(suffix=".wav")
225
- os.close(fd2)
226
- cmd2 = [
227
- "ffmpeg", "-y",
228
- "-fflags", "+genpts",
229
- "-i", input_video,
230
- "-vn",
231
- "-ac", "1", "-ar", str(sr),
232
- "-c:a", "pcm_s16le",
233
- wav2
234
- ]
235
- subprocess.run(cmd2, check=True, capture_output=True)
236
- wav2_dur = probe_duration(wav2)
237
- print(f"[probe] wav #2: {wav2_dur:.2f}s" if wav2_dur else "[probe] wav #2: ?")
238
- if wav2_dur and (not wav_dur or wav2_dur > wav_dur):
239
- try:
240
- Path(wav_path).unlink(missing_ok=True)
241
- Path(remux).unlink(missing_ok=True)
242
- except Exception:
243
- pass
244
- return wav2
245
-
246
- try:
247
- Path(remux).unlink(missing_ok=True)
248
- except Exception:
249
- pass
250
- return wav_path
251
 
252
  def transcribe(video_file: str, model_size: str = "small") -> List[Segment]:
253
- print(f"[whisper] modelo: {model_size}")
254
  model = whisper.load_model(model_size)
255
-
256
- print("[audio] extraindo WAV robusto…")
257
- audio_wav = extract_audio_wav_strong(video_file, sr=16000)
258
-
259
- vid_dur = probe_duration(video_file)
260
- wav_dur = probe_duration(audio_wav)
261
- if vid_dur: print(f"[dur] vídeo: {vid_dur:.2f}s")
262
- if wav_dur: print(f"[dur] wav: {wav_dur:.2f}s")
263
-
264
- print("[whisper] transcrevendo…")
265
- result = model.transcribe(
266
- audio_wav,
267
- language="pt",
268
- verbose=False,
269
- task="transcribe",
270
- temperature=0,
271
- condition_on_previous_text=False,
272
- fp16=False
273
- )
274
-
275
- segments = [Segment(start=s["start"], end=s["end"], text=s["text"].strip())
276
- for s in result.get("segments", [])]
277
- print(f"[whisper] segmentos: {len(segments)}")
278
-
279
- try:
280
- Path(audio_wav).unlink(missing_ok=True)
281
- except Exception:
282
- pass
283
  return segments
284
 
285
  # ======================= PROCESSAMENTO DE VÍDEO =======================
286
 
287
  def extract_video_segment(input_video: str, output_video: str, start_time: float, end_time: float) -> bool:
288
- duration = max(0.0, end_time - start_time)
289
- if duration <= 0:
290
- print(f"[extract] duração inválida: {duration}")
291
- return False
292
  cmd = [
293
  "ffmpeg", "-y", "-ss", str(start_time), "-i", input_video,
294
- "-t", str(duration),
295
- "-c:v", "libx264",
296
- "-c:a", "aac",
297
- "-movflags", "+faststart",
298
- output_video
299
  ]
 
300
  try:
301
  subprocess.run(cmd, check=True, capture_output=True)
302
  return True
303
  except subprocess.CalledProcessError as e:
304
- print(f"[extract] erro: {e}")
305
  return False
306
 
307
  def apply_smart_crop_to_video(input_path: str, output_path: str, target_width: int,
308
  target_height: int, sample_frames: int = 10) -> bool:
309
- """Calcula o melhor crop com rastreamento facial e aplica com FFmpeg preservando áudio."""
310
  tracker = FaceTracker()
311
  cap = cv2.VideoCapture(input_path)
 
312
  if not cap.isOpened():
313
- print(f"[crop] erro ao abrir: {input_path}")
314
  return False
315
 
 
316
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
317
  frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
318
  frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
319
 
 
320
  sample_positions = []
321
- frame_indices = np.linspace(0, frame_count - 1, min(sample_frames, max(1, frame_count)), dtype=int)
 
322
  for idx in frame_indices:
323
  cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
324
  ret, frame = cap.read()
325
  if ret:
326
  crop_coords = tracker.calculate_smart_crop(frame, target_width, target_height)
327
  sample_positions.append(crop_coords)
328
- cap.release()
329
 
 
330
  if sample_positions:
331
  avg_x = int(np.median([p[0] for p in sample_positions]))
332
  avg_y = int(np.median([p[1] for p in sample_positions]))
@@ -334,6 +212,7 @@ def apply_smart_crop_to_video(input_path: str, output_path: str, target_width: i
334
  crop_h = sample_positions[0][3]
335
  final_crop = (avg_x, avg_y, crop_w, crop_h)
336
  else:
 
337
  target_ar = target_width / target_height
338
  frame_ar = frame_w / frame_h
339
  if target_ar < frame_ar:
@@ -345,25 +224,39 @@ def apply_smart_crop_to_video(input_path: str, output_path: str, target_width: i
345
  crop_h = int(frame_w / target_ar)
346
  final_crop = (0, (frame_h - crop_h) // 2, crop_w, crop_h)
347
 
348
- x, y, w, h = final_crop
349
- print(f"[crop] final: x={x}, y={y}, w={w}, h={h} -> {target_width}x{target_height}")
350
 
351
- vf = f"crop={w}:{h}:{x}:{y},scale={target_width}:{target_height}:flags=lanczos"
352
- cmd = [
353
- "ffmpeg", "-y", "-i", input_path,
354
- "-vf", vf,
355
- "-c:v", "libx264", "-preset", "veryfast", "-crf", "18",
356
- "-c:a", "copy",
357
- "-movflags", "+faststart",
358
- output_path
359
- ]
360
- try:
361
- subprocess.run(cmd, check=True, capture_output=True)
362
- print(f"[crop] concluído: {output_path}")
363
- return True
364
- except subprocess.CalledProcessError as e:
365
- print(f"[crop] erro ffmpeg: {e}")
366
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
  def apply_aspect_ratio(input_video: str, output_video: str, ar_mode: str, face_tracking: bool = False) -> bool:
369
  if ar_mode == "Original":
@@ -375,20 +268,20 @@ def apply_aspect_ratio(input_video: str, output_video: str, ar_mode: str, face_t
375
  "Quadrado 1:1": (1080, 1080),
376
  "Retrato 4:5": (1080, 1350),
377
  }
 
378
  if ar_mode not in ar_dims:
379
  return False
380
 
381
  width, height = ar_dims[ar_mode]
 
382
  if face_tracking:
383
  return apply_smart_crop_to_video(input_video, output_video, width, height)
384
  else:
 
385
  cmd = [
386
  "ffmpeg", "-y", "-i", input_video,
387
  "-vf", f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}",
388
- "-c:v", "libx264", "-preset", "veryfast", "-crf", "18",
389
- "-c:a", "copy",
390
- "-movflags", "+faststart",
391
- output_video
392
  ]
393
  try:
394
  subprocess.run(cmd, check=True, capture_output=True)
@@ -406,21 +299,117 @@ def concatenate_videos(video_files: List[str], output_file: str) -> bool:
406
  f.write(f"file '{os.path.abspath(vf)}'\n")
407
 
408
  try:
409
- cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", "-movflags", "+faststart", output_file]
410
  subprocess.run(cmd, check=True, capture_output=True)
411
  return True
412
  except subprocess.CalledProcessError:
413
- try:
414
- cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file,
415
- "-c:v", "libx264", "-preset", "veryfast", "-crf", "18",
416
- "-c:a", "aac", "-movflags", "+faststart", output_file]
417
- subprocess.run(cmd, check=True, capture_output=True)
418
- return True
419
- except subprocess.CalledProcessError:
420
- return False
421
  finally:
422
  Path(list_file).unlink(missing_ok=True)
423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
  # ======================= GERAÇÃO DE CORTES =======================
425
 
426
  def generate_linear_cuts(video_file: str, segments: List[Segment], output_dir: str,
@@ -432,12 +421,12 @@ def generate_linear_cuts(video_file: str, segments: List[Segment], output_dir: s
432
 
433
  Path(output_dir).mkdir(parents=True, exist_ok=True)
434
  total_duration = segments[-1].end - segments[0].start
435
- target_duration = min(max_len, max(min_len, total_duration / max(1, int(k))))
436
 
437
  outputs = []
438
  current_start = segments[0].start
439
 
440
- for i in range(int(k)):
441
  target_end = current_start + target_duration
442
  best_end = target_end
443
 
@@ -455,16 +444,15 @@ def generate_linear_cuts(video_file: str, segments: List[Segment], output_dir: s
455
  temp_file = Path(output_dir) / f"temp_linear_{i+1}.mp4"
456
  final_file = Path(output_dir) / f"cut_linear_{i+1}.mp4"
457
 
458
- print(f"[linear] corte {i+1}/{k}: {start_with_pad:.1f}s - {end_with_pad:.1f}s")
459
 
460
- src_path = resolve_video_path(video_file) or video_file
461
- if extract_video_segment(src_path, str(temp_file), start_with_pad, end_with_pad):
462
  if ar_mode != "Original":
463
  if apply_aspect_ratio(str(temp_file), str(final_file), ar_mode, face_tracking):
464
- Path(temp_file).unlink(missing_ok=True)
465
  outputs.append(str(final_file))
466
  else:
467
- Path(temp_file).rename(final_file)
468
  outputs.append(str(final_file))
469
 
470
  current_start = best_end + gap_threshold
@@ -484,7 +472,8 @@ def generate_creative_cuts(video_file: str, segments: List[Segment], output_dir:
484
  Path(output_dir).mkdir(parents=True, exist_ok=True)
485
  outputs = []
486
 
487
- for i in range(int(k)):
 
488
  num_blocks = random.randint(min_blocks, min(max_blocks, len(segments)))
489
  step = max(1, len(segments) // num_blocks)
490
  selected_indices = [j * step for j in range(num_blocks)]
@@ -495,8 +484,8 @@ def generate_creative_cuts(video_file: str, segments: List[Segment], output_dir:
495
  block_file = Path(output_dir) / f"temp_creative_{i+1}_block_{j+1}.mp4"
496
  start = max(0, seg.start - pad)
497
  end = seg.end + pad
498
- src_path = resolve_video_path(video_file) or video_file
499
- if extract_video_segment(src_path, str(block_file), start, end):
500
  block_files.append(str(block_file))
501
 
502
  if not block_files:
@@ -508,10 +497,10 @@ def generate_creative_cuts(video_file: str, segments: List[Segment], output_dir:
508
 
509
  if ar_mode != "Original":
510
  if apply_aspect_ratio(str(concat_file), str(final_file), ar_mode, face_tracking):
511
- Path(concat_file).unlink(missing_ok=True)
512
  outputs.append(str(final_file))
513
  else:
514
- Path(concat_file).rename(final_file)
515
  outputs.append(str(final_file))
516
 
517
  for bf in block_files:
@@ -525,10 +514,9 @@ SPACE_OUT = Path("outputs")
525
  SPACE_OUT.mkdir(exist_ok=True, parents=True)
526
 
527
  def do_transcribe(video_file, model_size):
528
- true_path = resolve_video_path(video_file)
529
- if not true_path or not os.path.exists(true_path):
530
- return [], "Selecione um vídeo válido."
531
- segs = transcribe(true_path, model_size=model_size)
532
  preview = "\n".join([f"[{s.start:.1f}–{s.end:.1f}] {s.text}" for s in segs[:12]])
533
  return segs, f"Transcrição ok. Segmentos: {len(segs)}\n\nPrévia:\n{preview}"
534
 
@@ -536,21 +524,19 @@ def run_linear(segs, video_file, out_subdir, min_len, max_len, ideal_len, k, gap
536
  if not segs:
537
  return [], "Transcreva antes de cortar."
538
  workdir = SPACE_OUT / (out_subdir or "cortes")
539
- outs = generate_linear_cuts(video_file, segs, str(workdir),
540
- min_len=float(min_len), max_len=float(max_len), ideal_len=float(ideal_len),
541
- k=int(k), gap_threshold=float(gap), pad=float(pad),
542
- ar_mode=str(ar_mode), face_tracking=bool(face_tracking))
543
  return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)."
544
 
545
  def run_creative(segs, video_file, out_subdir, min_len, max_len, ideal_len, minb, maxb, k, gap, pad, ar_mode, face_tracking):
546
  if not segs:
547
  return [], "Transcreva antes de cortar."
548
  workdir = SPACE_OUT / (out_subdir or "cortes")
549
- outs = generate_creative_cuts(video_file, segs, str(workdir),
550
- min_len=float(min_len), max_len=float(max_len), ideal_len=float(ideal_len),
551
- min_blocks=int(minb), max_blocks=int(maxb), k=int(k),
552
- gap_threshold=float(gap), pad=float(pad),
553
- ar_mode=str(ar_mode), face_tracking=bool(face_tracking))
554
  return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)."
555
 
556
  css = """
@@ -574,7 +560,7 @@ with gr.Blocks(title="Editor de Cortes Automático", css=css) as demo:
574
  gr.HTML("""
575
  <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;800&display=swap" rel="stylesheet">
576
  <div style="text-align: center; padding: 24px 0;">
577
- <h1>Editor de Cortes Automático</h1>
578
  <p style="color: #6b7280;">Gere cortes com rastreamento facial inteligente</p>
579
  </div>
580
  """)
@@ -585,11 +571,11 @@ with gr.Blocks(title="Editor de Cortes Automático", css=css) as demo:
585
  with gr.Row():
586
  model_size = gr.Dropdown(["tiny","base","small","medium"], value="small", label="Modelo Whisper")
587
  out_subdir = gr.Textbox(label="Pasta de saída", value="cortes")
588
- transcribe_btn = gr.Button("1) Transcrever", variant="primary")
589
  transcript_preview = gr.Textbox(label="Status", lines=10)
590
 
591
  with gr.Column():
592
- with gr.Tab("Cortes Simples"):
593
  with gr.Row():
594
  min_len = gr.Number(value=600, label="Min (s)")
595
  max_len = gr.Number(value=900, label="Max (s)")
@@ -601,12 +587,12 @@ with gr.Blocks(title="Editor de Cortes Automático", css=css) as demo:
601
  pad = gr.Number(value=0.08, label="Pad")
602
  ar_mode = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
603
  value="Original", label="Formato")
604
- face_tracking = gr.Checkbox(label="Rastreamento facial", value=True)
605
- go_linear = gr.Button("2) Gerar Cortes", variant="primary")
606
  out_linear = gr.Files(label="Arquivos gerados")
607
  status_linear = gr.Textbox(label="Status", lines=2)
608
 
609
- with gr.Tab("Cortes Criativos"):
610
  with gr.Row():
611
  minb = gr.Number(value=3, label="Blocos min")
612
  maxb = gr.Number(value=8, label="Blocos max")
@@ -616,8 +602,8 @@ with gr.Blocks(title="Editor de Cortes Automático", css=css) as demo:
616
  pad2 = gr.Number(value=0.08, label="Pad")
617
  ar_mode2 = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
618
  value="Original", label="Formato")
619
- face_tracking2 = gr.Checkbox(label="Rastreamento facial", value=True)
620
- go_creative = gr.Button("3) Gerar Criativos", variant="primary")
621
  out_creative = gr.Files(label="Arquivos gerados")
622
  status_creative = gr.Textbox(label="Status", lines=2)
623
 
@@ -630,5 +616,4 @@ with gr.Blocks(title="Editor de Cortes Automático", css=css) as demo:
630
  outputs=[out_creative, status_creative])
631
 
632
  if __name__ == "__main__":
633
- # Fila para tarefas longas (compatível com Gradio 4)
634
- demo.queue(max_size=20).launch()
 
6
  import gradio as gr
7
  import cv2
8
  import numpy as np
9
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
10
  import whisper
11
  import subprocess
12
  from pathlib import Path
13
  from dataclasses import dataclass
14
+ from typing import List, Tuple, Optional
15
  import tempfile
16
  import os
17
  import shutil
 
 
18
 
19
  # ======================= DATACLASSES =======================
20
 
 
39
  center_y: int
40
  confidence: float = 1.0
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # ======================= FACE TRACKING =======================
43
 
44
  class FaceTracker:
 
61
 
62
  self.enabled = self.face_cascade is not None and not self.face_cascade.empty()
63
  if self.enabled:
64
+ print("Detector de rostos carregado")
65
  else:
66
+ print("⚠️ Detector de rostos não disponível - usando crop centralizado")
67
 
68
  def detect_faces(self, frame: np.ndarray) -> List[FaceBox]:
69
  if not self.enabled:
 
142
 
143
  return (crop_x, crop_y, crop_w, crop_h)
144
 
145
+ # ======================= TRANSCRIÇÃO =======================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  def transcribe(video_file: str, model_size: str = "small") -> List[Segment]:
148
+ print(f"🎙️ Carregando modelo Whisper: {model_size}")
149
  model = whisper.load_model(model_size)
150
+
151
+ print(f"🎬 Transcrevendo: {video_file}")
152
+ result = model.transcribe(video_file, language="pt", verbose=False)
153
+
154
+ segments = []
155
+ for seg in result["segments"]:
156
+ segments.append(Segment(
157
+ start=seg["start"],
158
+ end=seg["end"],
159
+ text=seg["text"].strip()
160
+ ))
161
+
162
+ print(f"✅ Transcrição completa: {len(segments)} segmentos")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  return segments
164
 
165
  # ======================= PROCESSAMENTO DE VÍDEO =======================
166
 
167
  def extract_video_segment(input_video: str, output_video: str, start_time: float, end_time: float) -> bool:
168
+ duration = end_time - start_time
 
 
 
169
  cmd = [
170
  "ffmpeg", "-y", "-ss", str(start_time), "-i", input_video,
171
+ "-t", str(duration), "-c:v", "libx264", "-c:a", "aac",
172
+ "-strict", "experimental", output_video
 
 
 
173
  ]
174
+
175
  try:
176
  subprocess.run(cmd, check=True, capture_output=True)
177
  return True
178
  except subprocess.CalledProcessError as e:
179
+ print(f" Erro ao extrair: {e}")
180
  return False
181
 
182
  def apply_smart_crop_to_video(input_path: str, output_path: str, target_width: int,
183
  target_height: int, sample_frames: int = 10) -> bool:
 
184
  tracker = FaceTracker()
185
  cap = cv2.VideoCapture(input_path)
186
+
187
  if not cap.isOpened():
188
+ print(f" Erro ao abrir: {input_path}")
189
  return False
190
 
191
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
192
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
193
  frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
194
  frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
195
 
196
+ # Amostragem para suavização
197
  sample_positions = []
198
+ frame_indices = np.linspace(0, frame_count - 1, min(sample_frames, frame_count), dtype=int)
199
+
200
  for idx in frame_indices:
201
  cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
202
  ret, frame = cap.read()
203
  if ret:
204
  crop_coords = tracker.calculate_smart_crop(frame, target_width, target_height)
205
  sample_positions.append(crop_coords)
 
206
 
207
+ # Posição média (suavizada)
208
  if sample_positions:
209
  avg_x = int(np.median([p[0] for p in sample_positions]))
210
  avg_y = int(np.median([p[1] for p in sample_positions]))
 
212
  crop_h = sample_positions[0][3]
213
  final_crop = (avg_x, avg_y, crop_w, crop_h)
214
  else:
215
+ # Fallback
216
  target_ar = target_width / target_height
217
  frame_ar = frame_w / frame_h
218
  if target_ar < frame_ar:
 
224
  crop_h = int(frame_w / target_ar)
225
  final_crop = (0, (frame_h - crop_h) // 2, crop_w, crop_h)
226
 
227
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
 
228
 
229
+ # Writer
230
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
231
+ out = cv2.VideoWriter(output_path, fourcc, fps, (target_width, target_height))
232
+
233
+ if not out.isOpened():
234
+ print(f"❌ Erro ao criar saída: {output_path}")
235
+ cap.release()
 
 
 
 
 
 
 
 
236
  return False
237
+
238
+ print(f"🎬 Processando com crop: {final_crop}")
239
+ frame_num = 0
240
+
241
+ while True:
242
+ ret, frame = cap.read()
243
+ if not ret:
244
+ break
245
+
246
+ x, y, w, h = final_crop
247
+ cropped = frame[y:y+h, x:x+w]
248
+ resized = cv2.resize(cropped, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4)
249
+ out.write(resized)
250
+ frame_num += 1
251
+
252
+ if frame_num % 30 == 0:
253
+ progress = (frame_num / frame_count) * 100
254
+ print(f" {progress:.1f}% ({frame_num}/{frame_count})")
255
+
256
+ cap.release()
257
+ out.release()
258
+ print(f"✅ Concluído: {output_path}")
259
+ return True
260
 
261
  def apply_aspect_ratio(input_video: str, output_video: str, ar_mode: str, face_tracking: bool = False) -> bool:
262
  if ar_mode == "Original":
 
268
  "Quadrado 1:1": (1080, 1080),
269
  "Retrato 4:5": (1080, 1350),
270
  }
271
+
272
  if ar_mode not in ar_dims:
273
  return False
274
 
275
  width, height = ar_dims[ar_mode]
276
+
277
  if face_tracking:
278
  return apply_smart_crop_to_video(input_video, output_video, width, height)
279
  else:
280
+ # Crop centralizado tradicional
281
  cmd = [
282
  "ffmpeg", "-y", "-i", input_video,
283
  "-vf", f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}",
284
+ "-c:a", "copy", output_video
 
 
 
285
  ]
286
  try:
287
  subprocess.run(cmd, check=True, capture_output=True)
 
299
  f.write(f"file '{os.path.abspath(vf)}'\n")
300
 
301
  try:
302
+ cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", output_file]
303
  subprocess.run(cmd, check=True, capture_output=True)
304
  return True
305
  except subprocess.CalledProcessError:
306
+ return False
 
 
 
 
 
 
 
307
  finally:
308
  Path(list_file).unlink(missing_ok=True)
309
 
310
+ # ======================= ANÁLISE VIRAL (ESTILO OPSCLIP) =======================
311
+
312
+ def score_segment_virality(seg: Segment, idx: int, total: int) -> float:
313
+ """
314
+ Pontua um segmento baseado em potencial viral.
315
+ Inspirado nos padrões do OpsClip.
316
+ """
317
+ score = 0.0
318
+ text = seg.text.lower()
319
+
320
+ # GANCHOS (perguntas, provocações)
321
+ if any(w in text for w in ["?", "por que", "qual", "como", "você"]):
322
+ score += 15
323
+
324
+ # FRASES DE IMPACTO
325
+ impact_phrases = [
326
+ "não dá", "problema", "esse é o", "imaginou", "é só",
327
+ "mas", "porém", "entretanto", "então", "olha",
328
+ "escuta", "presta atenção", "isso", "agora"
329
+ ]
330
+ for phrase in impact_phrases:
331
+ if phrase in text:
332
+ score += 8
333
+
334
+ # NEGAÇÕES E CONTRASTES (criam tensão)
335
+ if any(w in text for w in ["não", "nunca", "jamais", "sem"]):
336
+ score += 5
337
+
338
+ # AÇÃO/IMPERATIVO (engajamento)
339
+ if any(w in text for w in ["tem que", "precisa", "deve", "faça", "veja"]):
340
+ score += 7
341
+
342
+ # NÚMEROS E DADOS (autoridade)
343
+ if any(c.isdigit() for c in text):
344
+ score += 6
345
+
346
+ # DURAÇÃO IDEAL (15-45s = viral)
347
+ duration = seg.end - seg.start
348
+ if 15 <= duration <= 45:
349
+ score += 20
350
+ elif 10 <= duration <= 60:
351
+ score += 10
352
+
353
+ # POSIÇÃO NO VÍDEO (meio tem mais contexto)
354
+ position_ratio = idx / max(1, total)
355
+ if 0.2 <= position_ratio <= 0.8: # Evita extremos
356
+ score += 10
357
+
358
+ # COMPLETUDE (evita frases cortadas)
359
+ if text.strip().endswith((".", "!", "?", "né", "tá")):
360
+ score += 8
361
+
362
+ return score
363
+
364
+ def find_viral_moments(segments: List[Segment], k: int = 5) -> List[Tuple[int, int, float]]:
365
+ """
366
+ Encontra os k melhores momentos virais.
367
+ Retorna lista de (start_idx, end_idx, score)
368
+ """
369
+ viral_windows = []
370
+
371
+ # Janelas deslizantes de diferentes tamanhos
372
+ window_sizes = [1, 2, 3, 4, 5] # Quantos segmentos consecutivos
373
+
374
+ for window_size in window_sizes:
375
+ for i in range(len(segments) - window_size + 1):
376
+ window_segments = segments[i:i+window_size]
377
+
378
+ # Calcula duração total da janela
379
+ total_duration = window_segments[-1].end - window_segments[0].start
380
+
381
+ # Pula janelas muito longas ou curtas
382
+ if total_duration < 10 or total_duration > 60:
383
+ continue
384
+
385
+ # Pontuação agregada da janela
386
+ window_score = sum(score_segment_virality(seg, i+j, len(segments))
387
+ for j, seg in enumerate(window_segments))
388
+
389
+ # Bonus para janelas com narrativa completa
390
+ combined_text = " ".join(s.text for s in window_segments)
391
+ if "?" in combined_text and any(w in combined_text.lower() for w in ["porque", "então", "mas", "porém"]):
392
+ window_score += 15 # Pergunta + resposta = narrativa completa
393
+
394
+ viral_windows.append((i, i+window_size-1, window_score, total_duration))
395
+
396
+ # Ordena por score e remove sobreposições
397
+ viral_windows.sort(key=lambda x: x[2], reverse=True)
398
+
399
+ selected = []
400
+ used_indices = set()
401
+
402
+ for start_idx, end_idx, score, duration in viral_windows:
403
+ # Verifica se não sobrepõe com já selecionados
404
+ if not any(idx in used_indices for idx in range(start_idx, end_idx + 1)):
405
+ selected.append((start_idx, end_idx, score))
406
+ used_indices.update(range(start_idx, end_idx + 1))
407
+
408
+ if len(selected) >= k:
409
+ break
410
+
411
+ return selected
412
+
413
  # ======================= GERAÇÃO DE CORTES =======================
414
 
415
  def generate_linear_cuts(video_file: str, segments: List[Segment], output_dir: str,
 
421
 
422
  Path(output_dir).mkdir(parents=True, exist_ok=True)
423
  total_duration = segments[-1].end - segments[0].start
424
+ target_duration = min(max_len, max(min_len, total_duration / k))
425
 
426
  outputs = []
427
  current_start = segments[0].start
428
 
429
+ for i in range(k):
430
  target_end = current_start + target_duration
431
  best_end = target_end
432
 
 
444
  temp_file = Path(output_dir) / f"temp_linear_{i+1}.mp4"
445
  final_file = Path(output_dir) / f"cut_linear_{i+1}.mp4"
446
 
447
+ print(f"✂️ Corte {i+1}/{k}: {start_with_pad:.1f}s - {end_with_pad:.1f}s")
448
 
449
+ if extract_video_segment(video_file, str(temp_file), start_with_pad, end_with_pad):
 
450
  if ar_mode != "Original":
451
  if apply_aspect_ratio(str(temp_file), str(final_file), ar_mode, face_tracking):
452
+ temp_file.unlink()
453
  outputs.append(str(final_file))
454
  else:
455
+ temp_file.rename(final_file)
456
  outputs.append(str(final_file))
457
 
458
  current_start = best_end + gap_threshold
 
472
  Path(output_dir).mkdir(parents=True, exist_ok=True)
473
  outputs = []
474
 
475
+ import random
476
+ for i in range(k):
477
  num_blocks = random.randint(min_blocks, min(max_blocks, len(segments)))
478
  step = max(1, len(segments) // num_blocks)
479
  selected_indices = [j * step for j in range(num_blocks)]
 
484
  block_file = Path(output_dir) / f"temp_creative_{i+1}_block_{j+1}.mp4"
485
  start = max(0, seg.start - pad)
486
  end = seg.end + pad
487
+
488
+ if extract_video_segment(video_file, str(block_file), start, end):
489
  block_files.append(str(block_file))
490
 
491
  if not block_files:
 
497
 
498
  if ar_mode != "Original":
499
  if apply_aspect_ratio(str(concat_file), str(final_file), ar_mode, face_tracking):
500
+ concat_file.unlink()
501
  outputs.append(str(final_file))
502
  else:
503
+ concat_file.rename(final_file)
504
  outputs.append(str(final_file))
505
 
506
  for bf in block_files:
 
514
  SPACE_OUT.mkdir(exist_ok=True, parents=True)
515
 
516
  def do_transcribe(video_file, model_size):
517
+ if video_file is None:
518
+ return [], "Selecione um vídeo."
519
+ segs = transcribe(video_file, model_size=model_size)
 
520
  preview = "\n".join([f"[{s.start:.1f}–{s.end:.1f}] {s.text}" for s in segs[:12]])
521
  return segs, f"Transcrição ok. Segmentos: {len(segs)}\n\nPrévia:\n{preview}"
522
 
 
524
  if not segs:
525
  return [], "Transcreva antes de cortar."
526
  workdir = SPACE_OUT / (out_subdir or "cortes")
527
+ outs = generate_linear_cuts(video_file, segs, str(workdir), min_len=min_len, max_len=max_len,
528
+ ideal_len=ideal_len, k=k, gap_threshold=gap, pad=pad,
529
+ ar_mode=ar_mode, face_tracking=face_tracking)
 
530
  return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)."
531
 
532
  def run_creative(segs, video_file, out_subdir, min_len, max_len, ideal_len, minb, maxb, k, gap, pad, ar_mode, face_tracking):
533
  if not segs:
534
  return [], "Transcreva antes de cortar."
535
  workdir = SPACE_OUT / (out_subdir or "cortes")
536
+ outs = generate_creative_cuts(video_file, segs, str(workdir), min_len=min_len, max_len=max_len,
537
+ ideal_len=ideal_len, min_blocks=minb, max_blocks=maxb,
538
+ k=k, gap_threshold=gap, pad=pad, ar_mode=ar_mode,
539
+ face_tracking=face_tracking)
 
540
  return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)."
541
 
542
  css = """
 
560
  gr.HTML("""
561
  <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;800&display=swap" rel="stylesheet">
562
  <div style="text-align: center; padding: 24px 0;">
563
+ <h1>🎬 Editor de Cortes Automático</h1>
564
  <p style="color: #6b7280;">Gere cortes com rastreamento facial inteligente</p>
565
  </div>
566
  """)
 
571
  with gr.Row():
572
  model_size = gr.Dropdown(["tiny","base","small","medium"], value="small", label="Modelo Whisper")
573
  out_subdir = gr.Textbox(label="Pasta de saída", value="cortes")
574
+ transcribe_btn = gr.Button("🎙️ 1) Transcrever", variant="primary")
575
  transcript_preview = gr.Textbox(label="Status", lines=10)
576
 
577
  with gr.Column():
578
+ with gr.Tab("✂️ Cortes Simples"):
579
  with gr.Row():
580
  min_len = gr.Number(value=600, label="Min (s)")
581
  max_len = gr.Number(value=900, label="Max (s)")
 
587
  pad = gr.Number(value=0.08, label="Pad")
588
  ar_mode = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
589
  value="Original", label="Formato")
590
+ face_tracking = gr.Checkbox(label="👤 Rastreamento facial", value=True)
591
+ go_linear = gr.Button("🚀 2) Gerar Cortes", variant="primary")
592
  out_linear = gr.Files(label="Arquivos gerados")
593
  status_linear = gr.Textbox(label="Status", lines=2)
594
 
595
+ with gr.Tab("🎨 Cortes Criativos"):
596
  with gr.Row():
597
  minb = gr.Number(value=3, label="Blocos min")
598
  maxb = gr.Number(value=8, label="Blocos max")
 
602
  pad2 = gr.Number(value=0.08, label="Pad")
603
  ar_mode2 = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
604
  value="Original", label="Formato")
605
+ face_tracking2 = gr.Checkbox(label="👤 Rastreamento facial", value=True)
606
+ go_creative = gr.Button("🎬 3) Gerar Criativos", variant="primary")
607
  out_creative = gr.Files(label="Arquivos gerados")
608
  status_creative = gr.Textbox(label="Status", lines=2)
609
 
 
616
  outputs=[out_creative, status_creative])
617
 
618
  if __name__ == "__main__":
619
+ demo.launch()