leicam commited on
Commit
fbd839e
·
verified ·
1 Parent(s): cb2bcf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +460 -290
app.py CHANGED
@@ -1,16 +1,419 @@
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
2
  from pathlib import Path
3
- import shutil
 
 
4
  import os
5
- from core import transcribe, generate_linear_cuts, generate_creative_cuts, Segment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- SPACE_OUT = Path("outputs"); SPACE_OUT.mkdir(exist_ok=True, parents=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def do_transcribe(video_file, model_size):
10
  if video_file is None:
11
  return [], "Selecione um vídeo."
12
  segs = transcribe(video_file, model_size=model_size)
13
- # show a small preview of transcript
14
  preview = "\n".join([f"[{s.start:.1f}–{s.end:.1f}] {s.text}" for s in segs[:12]])
15
  return segs, f"Transcrição ok. Segmentos: {len(segs)}\n\nPrévia:\n{preview}"
16
 
@@ -18,329 +421,96 @@ def run_linear(segs, video_file, out_subdir, min_len, max_len, ideal_len, k, gap
18
  if not segs:
19
  return [], "Transcreva antes de cortar."
20
  workdir = SPACE_OUT / (out_subdir or "cortes")
21
- outs = generate_linear_cuts(video_file, segs, str(workdir),
22
- min_len=min_len, max_len=max_len, ideal_len=ideal_len,
23
- k=k, gap_threshold=gap, pad=pad, ar_mode=ar_mode,
24
- face_tracking=face_tracking)
25
- links = [str(Path(p)) for p in outs]
26
- return links, f"Gerados: {len(links)} arquivo(s)."
27
 
28
  def run_creative(segs, video_file, out_subdir, min_len, max_len, ideal_len, minb, maxb, k, gap, pad, ar_mode, face_tracking):
29
  if not segs:
30
  return [], "Transcreva antes de cortar."
31
  workdir = SPACE_OUT / (out_subdir or "cortes")
32
- outs = generate_creative_cuts(video_file, segs, str(workdir),
33
- min_len=min_len, max_len=max_len, ideal_len=ideal_len,
34
- min_blocks=minb, max_blocks=maxb,
35
  k=k, gap_threshold=gap, pad=pad, ar_mode=ar_mode,
36
  face_tracking=face_tracking)
37
- links = [str(Path(p)) for p in outs]
38
- return links, f"Gerados: {len(links)} arquivo(s)."
39
 
40
  css = """
41
- /* Design Tokens */
42
  :root {
43
  --neon: #39FF14;
44
- --txt: #0a0a0a; /* texto escuro */
45
- --muted: #374151; /* texto secundário mais escuro p/ legibilidade */
46
  --line: #e5e7eb;
47
- --bg: #ffffff; /* fundo claro */
48
- }
49
-
50
- /* Força esquema claro mesmo se o Gradio estiver em dark */
51
- html, body, .gradio-container {
52
- background: var(--bg) !important;
53
- color: var(--txt) !important;
54
- }
55
- html[data-theme="dark"], .dark, .theme-dark {
56
- --txt: #0a0a0a !important;
57
- --muted: #374151 !important;
58
- --bg: #ffffff !important;
59
- --line: #e5e7eb !important;
60
- }
61
-
62
- /* Global Styles */
63
- .gradio-container {
64
- font-family: 'Manrope', system-ui, -apple-system, sans-serif !important;
65
- background: linear-gradient(135deg, rgba(57,255,20,0.03) 0%, rgba(255,255,255,1) 100%) !important;
66
- background-attachment: fixed !important;
67
- }
68
-
69
- /* Tipografia: garante texto visível em todos os elementos comuns do Gradio */
70
- .gradio-container,
71
- .gradio-container *:where(p, span, label, strong, em, small, b, i) {
72
- color: var(--txt) !important;
73
- }
74
- .gradio-container .gr-prose,
75
- .gradio-container .gr-prose * ,
76
- .gradio-container .prose,
77
- .gradio-container .prose * {
78
- color: var(--txt) !important;
79
- }
80
- .gradio-container h1, .gradio-container h2, .gradio-container h3 {
81
- font-weight: 800 !important;
82
- letter-spacing: -0.3px !important;
83
- color: var(--txt) !important;
84
- }
85
- .gradio-container h1 { font-size: clamp(28px, 5vw, 46px) !important; margin-bottom: 8px !important; }
86
-
87
- /* Texto secundário (parágrafos explicativos) */
88
- .gradio-container .gr-prose p,
89
- .gradio-container .prose p {
90
- color: var(--muted) !important;
91
- line-height: 1.65 !important;
92
- font-size: 16px !important;
93
- }
94
-
95
- /* Inputs, Textareas, Dropdowns */
96
- .gradio-container input,
97
- .gradio-container textarea,
98
- .gradio-container select,
99
- .gradio-container .wrap,
100
- .gradio-container .gr-textbox,
101
- .gradio-container .gr-dropdown,
102
- .gradio-container .gr-number {
103
- border: 1px solid var(--line) !important;
104
- border-radius: 12px !important;
105
- background: #fff !important;
106
- color: var(--txt) !important;
107
- transition: all 0.2s ease !important;
108
  }
109
- .gradio-container ::placeholder { color: #6b7280 !important; opacity: 1 !important; }
110
- .gradio-container input:focus,
111
- .gradio-container textarea:focus,
112
- .gradio-container select:focus {
113
- border-color: #cbd5e1 !important;
114
- box-shadow: 0 0 0 3px rgba(57,255,20,0.16) !important;
115
- }
116
-
117
- /* Labels */
118
- .gradio-container label {
119
- font-weight: 600 !important;
120
- color: var(--txt) !important;
121
- }
122
-
123
- /* Cards/Panels */
124
- .gradio-container .block {
125
- border: 1px solid var(--line) !important;
126
- border-radius: 16px !important;
127
- background: #fff !important;
128
- box-shadow: 0 2px 8px rgba(0,0,0,0.06) !important;
129
- transition: all 0.2s ease !important;
130
- }
131
- .gradio-container .block:hover { box-shadow: 0 6px 16px rgba(0,0,0,0.08) !important; }
132
-
133
- /* Buttons */
134
  .gradio-container button.primary {
135
- background: var(--neon) !important;
136
- color: #000 !important;
137
- border: none !important;
138
- border-radius: 10px !important;
139
- font-weight: 800 !important;
140
- padding: 12px 20px !important;
141
- box-shadow: 0 2px 0 rgba(0,0,0,0.12), 0 10px 30px rgba(57,255,20,0.18) !important;
142
- transition: all 0.2s ease !important;
143
- }
144
- .gradio-container button.primary:hover { transform: translateY(-1px) !important; filter: saturate(1.03) !important; }
145
- .gradio-container button:not(.primary) {
146
- background: #fff !important;
147
- border: 1px solid var(--line) !important;
148
- border-radius: 10px !important;
149
- color: var(--txt) !important;
150
- font-weight: 600 !important;
151
- }
152
-
153
- /* Tabs */
154
- .gradio-container .tabs { border-radius: 12px !important; }
155
- .gradio-container .tab-nav button { border-radius: 8px !important; font-weight: 600 !important; color: var(--txt) !important; }
156
- .gradio-container .tab-nav button.selected { background: var(--neon) !important; color: #000 !important; }
157
-
158
- /* Checkboxes */
159
- .gradio-container input[type="checkbox"] { accent-color: var(--neon) !important; }
160
- .gradio-container input[type="checkbox"]:checked {
161
- background: var(--neon) !important; border-color: var(--neon) !important;
162
- }
163
-
164
- /* Vídeo, upload e containers */
165
- .gradio-container video { border-radius: 12px !important; box-shadow: 0 4px 12px rgba(0,0,0,0.1) !important; }
166
- .gradio-container .upload-container {
167
- border: 2px dashed var(--line) !important; border-radius: 12px !important; background: #fafafa !important;
168
  }
169
-
170
- /* Números */
171
- .gradio-container input[type="number"] { font-weight: 600 !important; }
172
-
173
- /* Container spacing */
174
- .gradio-container .contain { max-width: 1200px !important; margin: 0 auto !important; }
175
  """
176
 
177
-
178
- with gr.Blocks(title="Editor de cortes automático", css=css) as demo:
179
  gr.HTML("""
180
- <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;700;800&display=swap" rel="stylesheet">
181
- <div style="text-align: center; padding: 24px 0 16px;">
182
- <div style="display: inline-flex; align-items: center; gap: 8px; margin-bottom: 12px;">
183
- <div style="width: 12px; height: 12px; border-radius: 50%; background: #39FF14; box-shadow: 0 0 20px rgba(57,255,20,0.4);"></div>
184
- <h1 style="margin: 0; font-weight: 800; letter-spacing: -0.4px;">Editor de Cortes Automático</h1>
185
- </div>
186
- <p style="color: #6b7280; max-width: 720px; margin: 0 auto; line-height: 1.65;">
187
- Gere cortes criativos ou trechos a partir de qualquer vídeo com <strong>rastreamento facial inteligente</strong>.
188
- </p>
189
  </div>
190
  """)
191
 
192
  with gr.Row():
193
- with gr.Column(scale=1):
194
- gr.HTML("""<div style="background: linear-gradient(135deg, #f9fafb 0%, #fff 100%);
195
- padding: 16px; border-radius: 16px; border: 1px solid #e5e7eb; margin-bottom: 16px;">
196
- <div style="font-weight: 700; color: #0a0a0a; margin-bottom: 8px;">🎬 Entrada</div>
197
- <p style="color: #6b7280; font-size: 14px; margin: 0;">Envie seu vídeo e configure as opções</p>
198
- </div>""")
199
-
200
  video = gr.Video(label="Vídeo de entrada", interactive=True)
201
-
202
  with gr.Row():
203
- model_size = gr.Dropdown(
204
- choices=["tiny","base","small","medium"],
205
- value="small",
206
- label="Modelo Whisper",
207
- info="Quanto maior, mais preciso mas mais lento"
208
- )
209
- out_subdir = gr.Textbox(
210
- label="Subpasta de saída",
211
- value="editor_de_cortes_automatico",
212
- info="Nome da pasta onde os cortes serão salvos"
213
- )
214
-
215
- transcribe_btn = gr.Button("🎙️ 1) Transcrever Vídeo", variant="primary", size="lg")
216
- transcript_preview = gr.Textbox(label="Status / Prévia da Transcrição", lines=10)
217
 
218
- with gr.Column(scale=1):
219
- gr.HTML("""<div style="background: linear-gradient(135deg, rgba(57,255,20,0.08) 0%, rgba(57,255,20,0.02) 100%);
220
- padding: 16px; border-radius: 16px; border: 1px solid #e5e7eb; margin-bottom: 16px;">
221
- <div style="font-weight: 700; color: #0a0a0a; margin-bottom: 8px;">⚙️ Configurações de Corte</div>
222
- <p style="color: #6b7280; font-size: 14px; margin: 0;">Escolha entre cortes simples ou criativos</p>
223
- </div>""")
224
-
225
  with gr.Tab("✂️ Cortes Simples"):
226
- gr.HTML("""<p style="color: #6b7280; font-size: 14px; margin-bottom: 16px;">
227
- Cortes lineares e contínuos do vídeo original</p>""")
228
-
229
  with gr.Row():
230
- min_len = gr.Number(value=600, label="⏱️ Duração mínima (s)", info="Mínimo de segundos por corte")
231
- max_len = gr.Number(value=900, label="⏱️ Duração máxima (s)", info="Máximo de segundos por corte")
232
-
233
  with gr.Row():
234
- ideal_len = gr.Number(value=900, label="🎯 Duração ideal (s)", info="Tamanho preferencial")
235
- k = gr.Number(value=2, label="📊 Quantidade de cortes", info="Quantos vídeos gerar")
236
-
237
  with gr.Row():
238
- gap = gr.Number(value=0.60, label="Gap (s)", info="Intervalo entre frases")
239
- pad = gr.Number(value=0.08, label="Pad (s)", info="Margem extra")
240
-
241
- ar_mode = gr.Dropdown(
242
- choices=["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
243
- value="Original",
244
- label="📐 Formato de vídeo"
245
- )
246
-
247
- face_tracking = gr.Checkbox(
248
- label="👤 Ativar rastreamento facial no crop",
249
- value=True,
250
- info="Detecta e centraliza rostos automaticamente ao redimensionar"
251
- )
252
-
253
- gr.HTML("""<div style="background: #ecfdf5; padding: 12px; border-radius: 10px; border: 1px solid #a7f3d0; margin: 12px 0;">
254
- <strong style="color: #065f46;">💡 Dica:</strong>
255
- <p style="color: #047857; font-size: 13px; margin: 6px 0 0;">
256
- O rastreamento facial mantém a pessoa sempre centralizada ao cortar para 9:16 ou 1:1
257
- </p>
258
- </div>""")
259
-
260
- go_linear = gr.Button("🚀 2) Gerar Cortes Simples", variant="primary")
261
- out_linear = gr.Files(label="📦 Arquivos gerados (simples)")
262
  status_linear = gr.Textbox(label="Status", lines=2)
263
 
264
  with gr.Tab("🎨 Cortes Criativos"):
265
- gr.HTML("""<p style="color: #6b7280; font-size: 14px; margin-bottom: 16px;">
266
- Montagens com múltiplos blocos e transições dinâmicas</p>""")
267
-
268
- with gr.Row():
269
- minb = gr.Number(value=3, label="🧩 Blocos mínimos", info="Mínimo de segmentos por vídeo")
270
- maxb = gr.Number(value=8, label="🧩 Blocos máximos", info="Máximo de segmentos por vídeo")
271
-
272
  with gr.Row():
273
- k2 = gr.Number(value=2, label="📊 Quantidade de cortes")
274
- gap2 = gr.Number(value=0.60, label="Gap (s)")
275
-
276
  with gr.Row():
277
- pad2 = gr.Number(value=0.08, label="Pad (s)")
278
- ar_mode2 = gr.Dropdown(
279
- choices=["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
280
- value="Original",
281
- label="📐 Formato"
282
- )
283
-
284
- face_tracking2 = gr.Checkbox(
285
- label="👤 Ativar rastreamento facial no crop",
286
- value=True,
287
- info="Detecta e centraliza rostos automaticamente"
288
- )
289
-
290
- gr.HTML("""<div style="background: #fef3c7; padding: 12px; border-radius: 10px; border: 1px solid #fcd34d; margin: 12px 0;">
291
- <strong style="color: #92400e;">⚡ Cortes Criativos:</strong>
292
- <p style="color: #78350f; font-size: 13px; margin: 6px 0 0;">
293
- Combina diferentes momentos do vídeo em uma montagem dinâmica
294
- </p>
295
- </div>""")
296
-
297
- go_creative = gr.Button("🎬 3) Gerar Cortes Criativos", variant="primary")
298
- out_creative = gr.Files(label="📦 Arquivos gerados (criativos)")
299
  status_creative = gr.Textbox(label="Status", lines=2)
300
-
301
- segs_state = gr.State([])
302
-
303
- transcribe_btn.click(
304
- do_transcribe,
305
- inputs=[video, model_size],
306
- outputs=[segs_state, transcript_preview],
307
- )
308
-
309
- go_linear.click(
310
- run_linear,
311
- inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, k, gap, pad, ar_mode, face_tracking],
312
- outputs=[out_linear, status_linear],
313
- )
314
-
315
- go_creative.click(
316
- run_creative,
317
- inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, minb, maxb, k2, gap2, pad2, ar_mode2, face_tracking2],
318
- outputs=[out_creative, status_creative],
319
- )
320
 
321
- gr.HTML("""
322
- <div style="margin-top: 32px; padding: 20px; background: #f9fafb; border-radius: 16px; border: 1px solid #e5e7eb;">
323
- <h3 style="margin: 0 0 12px; font-weight: 700; color: #0a0a0a;">💡 Como funciona o rastreamento facial</h3>
324
- <ul style="color: #6b7280; line-height: 1.65; padding-left: 20px; margin: 0;">
325
- <li><strong>Detecção automática:</strong> O sistema identifica rostos em cada frame do vídeo</li>
326
- <li><strong>Crop inteligente:</strong> Ao redimensionar para 9:16 ou 1:1, mantém o rosto centralizado</li>
327
- <li><strong>Múltiplos rostos:</strong> Se houver várias pessoas, prioriza o rosto mais central/próximo</li>
328
- <li><strong>Fallback:</strong> Se nenhum rosto for detectado, usa crop centralizado tradicional</li>
329
- </ul>
330
- </div>
331
- """)
332
 
333
- gr.HTML("""
334
- <footer style="margin-top: 40px; padding: 24px 0; border-top: 1px solid #e5e7eb; text-align: center;">
335
- <div style="display: inline-flex; align-items: center; gap: 8px; margin-bottom: 8px;">
336
- <div style="width: 10px; height: 10px; border-radius: 50%; background: #39FF14;"></div>
337
- <span style="font-weight: 700; color: #0a0a0a;">Leicam · Tech</span>
338
- </div>
339
- <p style="color: #6b7280; font-size: 13px; margin: 0;">
340
- Ferramentas práticas para produção de conteúdo
341
- </p>
342
- </footer>
343
- """)
344
 
345
  if __name__ == "__main__":
346
  demo.launch()
 
1
+ """
2
+ Video Clip Generator - Tudo integrado
3
+ Transcrição + Cortes + Face Tracking
4
+ """
5
+
6
  import gradio as gr
7
+ import cv2
8
+ import numpy as np
9
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
10
+ import whisper
11
+ import subprocess
12
  from pathlib import Path
13
+ from dataclasses import dataclass
14
+ from typing import List, Tuple, Optional
15
+ import tempfile
16
  import os
17
+ import shutil
18
+
19
+ # ======================= DATACLASSES =======================
20
+
21
+ @dataclass
22
+ class Segment:
23
+ """Representa um segmento de transcrição com timestamps."""
24
+ start: float
25
+ end: float
26
+ text: str
27
+
28
+ def __repr__(self):
29
+ return f"Segment({self.start:.1f}-{self.end:.1f}: {self.text[:50]}...)"
30
+
31
+ @dataclass
32
+ class FaceBox:
33
+ """Representa uma detecção de rosto."""
34
+ x: int
35
+ y: int
36
+ w: int
37
+ h: int
38
+ center_x: int
39
+ center_y: int
40
+ confidence: float = 1.0
41
+
42
+ # ======================= FACE TRACKING =======================
43
+
44
+ class FaceTracker:
45
+ """Rastreador de rostos para crop inteligente."""
46
+
47
+ def __init__(self):
48
+ cascade_paths = [
49
+ cv2.data.haarcascades + 'haarcascade_frontalface_default.xml',
50
+ cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml',
51
+ ]
52
+
53
+ self.face_cascade = None
54
+ for path in cascade_paths:
55
+ try:
56
+ self.face_cascade = cv2.CascadeClassifier(path)
57
+ if not self.face_cascade.empty():
58
+ break
59
+ except:
60
+ continue
61
+
62
+ self.enabled = self.face_cascade is not None and not self.face_cascade.empty()
63
+ if self.enabled:
64
+ print("✅ Detector de rostos carregado")
65
+ else:
66
+ print("⚠️ Detector de rostos não disponível - usando crop centralizado")
67
+
68
+ def detect_faces(self, frame: np.ndarray) -> List[FaceBox]:
69
+ if not self.enabled:
70
+ return []
71
+
72
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
73
+ faces = self.face_cascade.detectMultiScale(
74
+ gray, scaleFactor=1.1, minNeighbors=5,
75
+ minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE
76
+ )
77
+
78
+ face_boxes = []
79
+ for (x, y, w, h) in faces:
80
+ center_x = x + w // 2
81
+ center_y = y + h // 2
82
+ face_boxes.append(FaceBox(x, y, w, h, center_x, center_y))
83
+
84
+ return face_boxes
85
+
86
+ def get_primary_face(self, faces: List[FaceBox], frame_width: int, frame_height: int) -> Optional[FaceBox]:
87
+ if not faces:
88
+ return None
89
+ if len(faces) == 1:
90
+ return faces[0]
91
+
92
+ frame_center_x = frame_width / 2
93
+ frame_center_y = frame_height / 2
94
+
95
+ scored_faces = []
96
+ for face in faces:
97
+ size_score = (face.w * face.h) / (frame_width * frame_height)
98
+ dx = abs(face.center_x - frame_center_x) / frame_width
99
+ dy = abs(face.center_y - frame_center_y) / frame_height
100
+ center_score = 1 - (dx + dy) / 2
101
+ total_score = (size_score * 0.3) + (center_score * 0.7)
102
+ scored_faces.append((total_score, face))
103
+
104
+ scored_faces.sort(reverse=True, key=lambda x: x[0])
105
+ return scored_faces[0][1]
106
+
107
+ def calculate_smart_crop(self, frame: np.ndarray, target_width: int, target_height: int) -> Tuple[int, int, int, int]:
108
+ frame_h, frame_w = frame.shape[:2]
109
+ faces = self.detect_faces(frame)
110
+ primary_face = self.get_primary_face(faces, frame_w, frame_h)
111
+
112
+ target_ar = target_width / target_height
113
+ frame_ar = frame_w / frame_h
114
+
115
+ if primary_face:
116
+ face_center_x = primary_face.center_x
117
+ face_center_y = primary_face.center_y
118
+
119
+ if target_ar < frame_ar: # Vertical
120
+ crop_w = int(frame_h * target_ar)
121
+ crop_h = frame_h
122
+ crop_x = max(0, min(face_center_x - crop_w // 2, frame_w - crop_w))
123
+ crop_y = 0
124
+ else: # Horizontal/Quadrado
125
+ crop_w = frame_w
126
+ crop_h = int(frame_w / target_ar)
127
+ offset = int(crop_h * 0.1)
128
+ crop_x = 0
129
+ crop_y = max(0, min(face_center_y - crop_h // 2 - offset, frame_h - crop_h))
130
+ else:
131
+ # Fallback centralizado
132
+ if target_ar < frame_ar:
133
+ crop_w = int(frame_h * target_ar)
134
+ crop_h = frame_h
135
+ crop_x = (frame_w - crop_w) // 2
136
+ crop_y = 0
137
+ else:
138
+ crop_w = frame_w
139
+ crop_h = int(frame_w / target_ar)
140
+ crop_x = 0
141
+ crop_y = (frame_h - crop_h) // 2
142
+
143
+ return (crop_x, crop_y, crop_w, crop_h)
144
+
145
+ # ======================= TRANSCRIÇÃO =======================
146
+
147
+ def transcribe(video_file: str, model_size: str = "small") -> List[Segment]:
148
+ print(f"🎙️ Carregando modelo Whisper: {model_size}")
149
+ model = whisper.load_model(model_size)
150
+
151
+ print(f"🎬 Transcrevendo: {video_file}")
152
+ result = model.transcribe(video_file, language="pt", verbose=False)
153
+
154
+ segments = []
155
+ for seg in result["segments"]:
156
+ segments.append(Segment(
157
+ start=seg["start"],
158
+ end=seg["end"],
159
+ text=seg["text"].strip()
160
+ ))
161
+
162
+ print(f"✅ Transcrição completa: {len(segments)} segmentos")
163
+ return segments
164
 
165
+ # ======================= PROCESSAMENTO DE VÍDEO =======================
166
+
167
+ def extract_video_segment(input_video: str, output_video: str, start_time: float, end_time: float) -> bool:
168
+ duration = end_time - start_time
169
+ cmd = [
170
+ "ffmpeg", "-y", "-ss", str(start_time), "-i", input_video,
171
+ "-t", str(duration), "-c:v", "libx264", "-c:a", "aac",
172
+ "-strict", "experimental", output_video
173
+ ]
174
+
175
+ try:
176
+ subprocess.run(cmd, check=True, capture_output=True)
177
+ return True
178
+ except subprocess.CalledProcessError as e:
179
+ print(f"❌ Erro ao extrair: {e}")
180
+ return False
181
+
182
+ def apply_smart_crop_to_video(input_path: str, output_path: str, target_width: int,
183
+ target_height: int, sample_frames: int = 10) -> bool:
184
+ tracker = FaceTracker()
185
+ cap = cv2.VideoCapture(input_path)
186
+
187
+ if not cap.isOpened():
188
+ print(f"❌ Erro ao abrir: {input_path}")
189
+ return False
190
+
191
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
192
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
193
+ frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
194
+ frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
195
+
196
+ # Amostragem para suavização
197
+ sample_positions = []
198
+ frame_indices = np.linspace(0, frame_count - 1, min(sample_frames, frame_count), dtype=int)
199
+
200
+ for idx in frame_indices:
201
+ cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
202
+ ret, frame = cap.read()
203
+ if ret:
204
+ crop_coords = tracker.calculate_smart_crop(frame, target_width, target_height)
205
+ sample_positions.append(crop_coords)
206
+
207
+ # Posição média (suavizada)
208
+ if sample_positions:
209
+ avg_x = int(np.median([p[0] for p in sample_positions]))
210
+ avg_y = int(np.median([p[1] for p in sample_positions]))
211
+ crop_w = sample_positions[0][2]
212
+ crop_h = sample_positions[0][3]
213
+ final_crop = (avg_x, avg_y, crop_w, crop_h)
214
+ else:
215
+ # Fallback
216
+ target_ar = target_width / target_height
217
+ frame_ar = frame_w / frame_h
218
+ if target_ar < frame_ar:
219
+ crop_w = int(frame_h * target_ar)
220
+ crop_h = frame_h
221
+ final_crop = ((frame_w - crop_w) // 2, 0, crop_w, crop_h)
222
+ else:
223
+ crop_w = frame_w
224
+ crop_h = int(frame_w / target_ar)
225
+ final_crop = (0, (frame_h - crop_h) // 2, crop_w, crop_h)
226
+
227
+ cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
228
+
229
+ # Writer
230
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
231
+ out = cv2.VideoWriter(output_path, fourcc, fps, (target_width, target_height))
232
+
233
+ if not out.isOpened():
234
+ print(f"❌ Erro ao criar saída: {output_path}")
235
+ cap.release()
236
+ return False
237
+
238
+ print(f"🎬 Processando com crop: {final_crop}")
239
+ frame_num = 0
240
+
241
+ while True:
242
+ ret, frame = cap.read()
243
+ if not ret:
244
+ break
245
+
246
+ x, y, w, h = final_crop
247
+ cropped = frame[y:y+h, x:x+w]
248
+ resized = cv2.resize(cropped, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4)
249
+ out.write(resized)
250
+ frame_num += 1
251
+
252
+ if frame_num % 30 == 0:
253
+ progress = (frame_num / frame_count) * 100
254
+ print(f" {progress:.1f}% ({frame_num}/{frame_count})")
255
+
256
+ cap.release()
257
+ out.release()
258
+ print(f"✅ Concluído: {output_path}")
259
+ return True
260
+
261
+ def apply_aspect_ratio(input_video: str, output_video: str, ar_mode: str, face_tracking: bool = False) -> bool:
262
+ if ar_mode == "Original":
263
+ shutil.copy(input_video, output_video)
264
+ return True
265
+
266
+ ar_dims = {
267
+ "Vertical 9:16": (1080, 1920),
268
+ "Quadrado 1:1": (1080, 1080),
269
+ "Retrato 4:5": (1080, 1350),
270
+ }
271
+
272
+ if ar_mode not in ar_dims:
273
+ return False
274
+
275
+ width, height = ar_dims[ar_mode]
276
+
277
+ if face_tracking:
278
+ return apply_smart_crop_to_video(input_video, output_video, width, height)
279
+ else:
280
+ # Crop centralizado tradicional
281
+ cmd = [
282
+ "ffmpeg", "-y", "-i", input_video,
283
+ "-vf", f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}",
284
+ "-c:a", "copy", output_video
285
+ ]
286
+ try:
287
+ subprocess.run(cmd, check=True, capture_output=True)
288
+ return True
289
+ except subprocess.CalledProcessError:
290
+ return False
291
+
292
+ def concatenate_videos(video_files: List[str], output_file: str) -> bool:
293
+ if not video_files:
294
+ return False
295
+
296
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
297
+ list_file = f.name
298
+ for vf in video_files:
299
+ f.write(f"file '{os.path.abspath(vf)}'\n")
300
+
301
+ try:
302
+ cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", output_file]
303
+ subprocess.run(cmd, check=True, capture_output=True)
304
+ return True
305
+ except subprocess.CalledProcessError:
306
+ return False
307
+ finally:
308
+ Path(list_file).unlink(missing_ok=True)
309
+
310
+ # ======================= GERAÇÃO DE CORTES =======================
311
+
312
+ def generate_linear_cuts(video_file: str, segments: List[Segment], output_dir: str,
313
+ min_len: float = 600, max_len: float = 900, ideal_len: float = 900,
314
+ k: int = 2, gap_threshold: float = 0.60, pad: float = 0.08,
315
+ ar_mode: str = "Original", face_tracking: bool = False) -> List[str]:
316
+ if not segments:
317
+ return []
318
+
319
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
320
+ total_duration = segments[-1].end - segments[0].start
321
+ target_duration = min(max_len, max(min_len, total_duration / k))
322
+
323
+ outputs = []
324
+ current_start = segments[0].start
325
+
326
+ for i in range(k):
327
+ target_end = current_start + target_duration
328
+ best_end = target_end
329
+
330
+ for seg in segments:
331
+ if abs(seg.end - target_end) < gap_threshold and seg.end > current_start:
332
+ best_end = seg.end
333
+ break
334
+
335
+ if best_end - current_start > max_len:
336
+ best_end = current_start + max_len
337
+
338
+ start_with_pad = max(0, current_start - pad)
339
+ end_with_pad = best_end + pad
340
+
341
+ temp_file = Path(output_dir) / f"temp_linear_{i+1}.mp4"
342
+ final_file = Path(output_dir) / f"cut_linear_{i+1}.mp4"
343
+
344
+ print(f"✂️ Corte {i+1}/{k}: {start_with_pad:.1f}s - {end_with_pad:.1f}s")
345
+
346
+ if extract_video_segment(video_file, str(temp_file), start_with_pad, end_with_pad):
347
+ if ar_mode != "Original":
348
+ if apply_aspect_ratio(str(temp_file), str(final_file), ar_mode, face_tracking):
349
+ temp_file.unlink()
350
+ outputs.append(str(final_file))
351
+ else:
352
+ temp_file.rename(final_file)
353
+ outputs.append(str(final_file))
354
+
355
+ current_start = best_end + gap_threshold
356
+ if current_start >= segments[-1].end:
357
+ break
358
+
359
+ return outputs
360
+
361
+ def generate_creative_cuts(video_file: str, segments: List[Segment], output_dir: str,
362
+ min_len: float = 600, max_len: float = 900, ideal_len: float = 900,
363
+ min_blocks: int = 3, max_blocks: int = 8, k: int = 2,
364
+ gap_threshold: float = 0.60, pad: float = 0.08,
365
+ ar_mode: str = "Original", face_tracking: bool = False) -> List[str]:
366
+ if not segments or len(segments) < min_blocks:
367
+ return []
368
+
369
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
370
+ outputs = []
371
+
372
+ import random
373
+ for i in range(k):
374
+ num_blocks = random.randint(min_blocks, min(max_blocks, len(segments)))
375
+ step = max(1, len(segments) // num_blocks)
376
+ selected_indices = [j * step for j in range(num_blocks)]
377
+ selected_segments = [segments[idx] for idx in selected_indices if idx < len(segments)]
378
+
379
+ block_files = []
380
+ for j, seg in enumerate(selected_segments):
381
+ block_file = Path(output_dir) / f"temp_creative_{i+1}_block_{j+1}.mp4"
382
+ start = max(0, seg.start - pad)
383
+ end = seg.end + pad
384
+
385
+ if extract_video_segment(video_file, str(block_file), start, end):
386
+ block_files.append(str(block_file))
387
+
388
+ if not block_files:
389
+ continue
390
+
391
+ concat_file = Path(output_dir) / f"temp_creative_{i+1}_concat.mp4"
392
+ if concatenate_videos(block_files, str(concat_file)):
393
+ final_file = Path(output_dir) / f"cut_creative_{i+1}.mp4"
394
+
395
+ if ar_mode != "Original":
396
+ if apply_aspect_ratio(str(concat_file), str(final_file), ar_mode, face_tracking):
397
+ concat_file.unlink()
398
+ outputs.append(str(final_file))
399
+ else:
400
+ concat_file.rename(final_file)
401
+ outputs.append(str(final_file))
402
+
403
+ for bf in block_files:
404
+ Path(bf).unlink(missing_ok=True)
405
+
406
+ return outputs
407
+
408
+ # ======================= INTERFACE GRADIO =======================
409
+
410
+ SPACE_OUT = Path("outputs")
411
+ SPACE_OUT.mkdir(exist_ok=True, parents=True)
412
 
413
  def do_transcribe(video_file, model_size):
414
  if video_file is None:
415
  return [], "Selecione um vídeo."
416
  segs = transcribe(video_file, model_size=model_size)
 
417
  preview = "\n".join([f"[{s.start:.1f}–{s.end:.1f}] {s.text}" for s in segs[:12]])
418
  return segs, f"Transcrição ok. Segmentos: {len(segs)}\n\nPrévia:\n{preview}"
419
 
 
421
  if not segs:
422
  return [], "Transcreva antes de cortar."
423
  workdir = SPACE_OUT / (out_subdir or "cortes")
424
+ outs = generate_linear_cuts(video_file, segs, str(workdir), min_len=min_len, max_len=max_len,
425
+ ideal_len=ideal_len, k=k, gap_threshold=gap, pad=pad,
426
+ ar_mode=ar_mode, face_tracking=face_tracking)
427
+ return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)."
 
 
428
 
429
  def run_creative(segs, video_file, out_subdir, min_len, max_len, ideal_len, minb, maxb, k, gap, pad, ar_mode, face_tracking):
430
  if not segs:
431
  return [], "Transcreva antes de cortar."
432
  workdir = SPACE_OUT / (out_subdir or "cortes")
433
+ outs = generate_creative_cuts(video_file, segs, str(workdir), min_len=min_len, max_len=max_len,
434
+ ideal_len=ideal_len, min_blocks=minb, max_blocks=maxb,
 
435
  k=k, gap_threshold=gap, pad=pad, ar_mode=ar_mode,
436
  face_tracking=face_tracking)
437
+ return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)."
 
438
 
439
  css = """
 
440
  :root {
441
  --neon: #39FF14;
442
+ --txt: #0a0a0a;
443
+ --muted: #374151;
444
  --line: #e5e7eb;
445
+ --bg: #ffffff;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
  }
447
+ html, body, .gradio-container { background: var(--bg) !important; color: var(--txt) !important; }
448
+ .gradio-container { font-family: 'Manrope', system-ui, sans-serif !important; }
449
+ .gradio-container h1 { font-weight: 800 !important; font-size: clamp(28px, 5vw, 46px) !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  .gradio-container button.primary {
451
+ background: var(--neon) !important; color: #000 !important; border: none !important;
452
+ border-radius: 10px !important; font-weight: 800 !important; padding: 12px 20px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  }
 
 
 
 
 
 
454
  """
455
 
456
+ with gr.Blocks(title="Editor de Cortes Automático", css=css) as demo:
 
457
  gr.HTML("""
458
+ <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;800&display=swap" rel="stylesheet">
459
+ <div style="text-align: center; padding: 24px 0;">
460
+ <h1>🎬 Editor de Cortes Automático</h1>
461
+ <p style="color: #6b7280;">Gere cortes com rastreamento facial inteligente</p>
 
 
 
 
 
462
  </div>
463
  """)
464
 
465
  with gr.Row():
466
+ with gr.Column():
 
 
 
 
 
 
467
  video = gr.Video(label="Vídeo de entrada", interactive=True)
 
468
  with gr.Row():
469
+ model_size = gr.Dropdown(["tiny","base","small","medium"], value="small", label="Modelo Whisper")
470
+ out_subdir = gr.Textbox(label="Pasta de saída", value="cortes")
471
+ transcribe_btn = gr.Button("🎙️ 1) Transcrever", variant="primary")
472
+ transcript_preview = gr.Textbox(label="Status", lines=10)
 
 
 
 
 
 
 
 
 
 
473
 
474
+ with gr.Column():
 
 
 
 
 
 
475
  with gr.Tab("✂️ Cortes Simples"):
 
 
 
476
  with gr.Row():
477
+ min_len = gr.Number(value=600, label="Min (s)")
478
+ max_len = gr.Number(value=900, label="Max (s)")
 
479
  with gr.Row():
480
+ ideal_len = gr.Number(value=900, label="Ideal (s)")
481
+ k = gr.Number(value=2, label="Quantidade")
 
482
  with gr.Row():
483
+ gap = gr.Number(value=0.60, label="Gap")
484
+ pad = gr.Number(value=0.08, label="Pad")
485
+ ar_mode = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
486
+ value="Original", label="Formato")
487
+ face_tracking = gr.Checkbox(label="👤 Rastreamento facial", value=True)
488
+ go_linear = gr.Button("🚀 2) Gerar Cortes", variant="primary")
489
+ out_linear = gr.Files(label="Arquivos gerados")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  status_linear = gr.Textbox(label="Status", lines=2)
491
 
492
  with gr.Tab("🎨 Cortes Criativos"):
 
 
 
 
 
 
 
493
  with gr.Row():
494
+ minb = gr.Number(value=3, label="Blocos min")
495
+ maxb = gr.Number(value=8, label="Blocos max")
 
496
  with gr.Row():
497
+ k2 = gr.Number(value=2, label="Quantidade")
498
+ gap2 = gr.Number(value=0.60, label="Gap")
499
+ pad2 = gr.Number(value=0.08, label="Pad")
500
+ ar_mode2 = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
501
+ value="Original", label="Formato")
502
+ face_tracking2 = gr.Checkbox(label="👤 Rastreamento facial", value=True)
503
+ go_creative = gr.Button("🎬 3) Gerar Criativos", variant="primary")
504
+ out_creative = gr.Files(label="Arquivos gerados")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  status_creative = gr.Textbox(label="Status", lines=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
 
507
+ segs_state = gr.State([])
 
 
 
 
 
 
 
 
 
 
508
 
509
+ transcribe_btn.click(do_transcribe, inputs=[video, model_size], outputs=[segs_state, transcript_preview])
510
+ go_linear.click(run_linear, inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, k, gap, pad, ar_mode, face_tracking],
511
+ outputs=[out_linear, status_linear])
512
+ go_creative.click(run_creative, inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, minb, maxb, k2, gap2, pad2, ar_mode2, face_tracking2],
513
+ outputs=[out_creative, status_creative])
 
 
 
 
 
 
514
 
515
  if __name__ == "__main__":
516
  demo.launch()