leicam commited on
Commit
5ddf4bc
·
verified ·
1 Parent(s): 21b6c6c

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +25 -13
  2. app.py +92 -0
  3. core.py +443 -0
  4. huggingface.yaml +10 -0
  5. packages.txt +1 -0
  6. requirements (1).txt +9 -0
README.md CHANGED
@@ -1,13 +1,25 @@
1
- ---
2
- title: EditorCortes
3
- emoji: 🚀
4
- colorFrom: yellow
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.47.2
8
- app_file: app.py
9
- pinned: false
10
- short_description: Edita cortes para qualquer lugar
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Editor de cortes automático — Hugging Face Space
3
+
4
+ Esta é a versão web (Gradio) do seu app de **Editor de cortes**. Ela reusa a mesma lógica de transcrição (faster-whisper) e exportação com `ffmpeg-python`, mas sem Qt.
5
+
6
+ ## Como usar no Spaces
7
+ 1. Crie um novo Space (tipo **Gradio**).
8
+ 2. Faça upload destes arquivos na raiz do Space:
9
+ - `app.py`
10
+ - `core.py`
11
+ - `requirements.txt`
12
+ - `packages.txt`
13
+ 3. (Opcional) Se quiser GPU, defina o hardware do Space como **T4** ou superior nas configurações.
14
+ 4. Clique em **Restart** / **Deploy**.
15
+
16
+ ## Uso
17
+ - Envie um vídeo (`.mp4`, `.mov`, `.mkv`, `.avi`).
18
+ - Clique em **Transcrever**.
19
+ - Gere **Cortes simples** ou **Criativos** com os parâmetros desejados.
20
+ - Os arquivos finais aparecem para download.
21
+
22
+ ## Observações
23
+ - `packages.txt` instala `ffmpeg` no contêiner do Space.
24
+ - `requirements.txt` inclui `gradio`, `faster-whisper`, `sentence-transformers`, `torch` etc.
25
+ - Para melhor desempenho de `faster-whisper`, use **GPU**.
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from pathlib import Path
4
+ import shutil
5
+ import os
6
+ from core import transcribe, generate_linear_cuts, generate_creative_cuts, Segment
7
+
8
+ SPACE_OUT = Path("outputs"); SPACE_OUT.mkdir(exist_ok=True, parents=True)
9
+
10
+ def do_transcribe(video_file, model_size):
11
+ if video_file is None:
12
+ return [], "Selecione um vídeo."
13
+ segs = transcribe(video_file, model_size=model_size)
14
+ # show a small preview of transcript
15
+ preview = "\n".join([f"[{s.start:.1f}–{s.end:.1f}] {s.text}" for s in segs[:12]])
16
+ return segs, f"Transcrição ok. Segmentos: {len(segs)}\n\nPrévia:\n{preview}"
17
+
18
+ def run_linear(segs, video_file, out_subdir, min_len, max_len, ideal_len, k, gap, pad, ar_mode):
19
+ if not segs:
20
+ return [], "Transcreva antes de cortar."
21
+ workdir = SPACE_OUT / (out_subdir or "cortes")
22
+ outs = generate_linear_cuts(video_file, segs, str(workdir),
23
+ min_len=min_len, max_len=max_len, ideal_len=ideal_len,
24
+ k=k, gap_threshold=gap, pad=pad, ar_mode=ar_mode)
25
+ links = [str(Path(p)) for p in outs]
26
+ return links, f"Gerados: {len(links)} arquivo(s)."
27
+
28
+ def run_creative(segs, video_file, out_subdir, min_len, max_len, ideal_len, minb, maxb, k, gap, pad, ar_mode):
29
+ if not segs:
30
+ return [], "Transcreva antes de cortar."
31
+ workdir = SPACE_OUT / (out_subdir or "cortes")
32
+ outs = generate_creative_cuts(video_file, segs, str(workdir),
33
+ min_len=min_len, max_len=max_len, ideal_len=ideal_len,
34
+ min_blocks=minb, max_blocks=maxb,
35
+ k=k, gap_threshold=gap, pad=pad, ar_mode=ar_mode)
36
+ links = [str(Path(p)) for p in outs]
37
+ return links, f"Gerados: {len(links)} arquivo(s)."
38
+
39
+ with gr.Blocks(title="Editor de cortes automático — Space") as demo:
40
+ gr.Markdown("# Editor de cortes automático — Space (Gradio)\nDo desktop para o navegador. Carregue um vídeo, transcreva e gere cortes simples ou criativos.")
41
+ with gr.Row():
42
+ with gr.Column(scale=1):
43
+ video = gr.Video(label="Vídeo de entrada", interactive=True)
44
+ model_size = gr.Dropdown(choices=["tiny","base","small","medium"], value="small", label="Modelo Whisper")
45
+ out_subdir = gr.Textbox(label="Subpasta de saída", value="editor_de_cortes_automatico")
46
+ transcribe_btn = gr.Button("1) Transcrever", variant="primary")
47
+ transcript_preview = gr.Textbox(label="Status / Prévia", lines=10)
48
+ with gr.Column(scale=1):
49
+ with gr.Tab("Cortes simples"):
50
+ min_len = gr.Number(value=600, label="Mín (s)")
51
+ max_len = gr.Number(value=900, label="Máx (s)")
52
+ ideal_len = gr.Number(value=900, label="Ideal (s)")
53
+ k = gr.Number(value=2, label="Qtd cortes")
54
+ gap = gr.Number(value=0.60, label="Gap (s)")
55
+ pad = gr.Number(value=0.08, label="Pad (s)")
56
+ ar_mode = gr.Dropdown(choices=["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"], value="Original", label="Formato")
57
+ go_linear = gr.Button("2) Gerar cortes simples")
58
+ out_linear = gr.Files(label="Arquivos gerados (simples)")
59
+ status_linear = gr.Textbox(label="Status", lines=2)
60
+ with gr.Tab("Cortes criativos"):
61
+ minb = gr.Number(value=3, label="Blocos min")
62
+ maxb = gr.Number(value=8, label="Blocos máx")
63
+ k2 = gr.Number(value=2, label="Qtd cortes")
64
+ gap2 = gr.Number(value=0.60, label="Gap (s)")
65
+ pad2 = gr.Number(value=0.08, label="Pad (s)")
66
+ ar_mode2 = gr.Dropdown(choices=["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"], value="Original", label="Formato")
67
+ go_creative = gr.Button("3) Gerar cortes criativos")
68
+ out_creative = gr.Files(label="Arquivos gerados (criativos)")
69
+ status_creative = gr.Textbox(label="Status", lines=2)
70
+
71
+ segs_state = gr.State([])
72
+
73
+ transcribe_btn.click(
74
+ do_transcribe,
75
+ inputs=[video, model_size],
76
+ outputs=[segs_state, transcript_preview],
77
+ )
78
+
79
+ go_linear.click(
80
+ run_linear,
81
+ inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, k, gap, pad, ar_mode],
82
+ outputs=[out_linear, status_linear],
83
+ )
84
+
85
+ go_creative.click(
86
+ run_creative,
87
+ inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, minb, maxb, k2, gap2, pad2, ar_mode2],
88
+ outputs=[out_creative, status_creative],
89
+ )
90
+
91
+ if __name__ == "__main__":
92
+ demo.launch()
core.py ADDED
@@ -0,0 +1,443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from __future__ import annotations
3
+ import os
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import List, Tuple, Dict, Any
7
+
8
+ import numpy as np
9
+
10
+ # Third-party deps
11
+ try:
12
+ import ffmpeg # ffmpeg-python
13
+ except Exception as e:
14
+ ffmpeg = None
15
+ try:
16
+ from faster_whisper import WhisperModel
17
+ except Exception as e:
18
+ WhisperModel = None
19
+
20
+ # -------- Data structures --------
21
+ @dataclass
22
+ class Segment:
23
+ start: float
24
+ end: float
25
+ text: str
26
+ conf: float
27
+
28
+ @dataclass
29
+ class ClipCandidate:
30
+ start: float
31
+ end: float
32
+ score: float
33
+ text: str
34
+
35
+ # -------- Heuristics (same as desktop) --------
36
+ KEYWORDS_HOOK = [
37
+ "segredo", "ninguém te conta", "o erro", "o truque", "como", "aprendi",
38
+ "descobri", "vale ouro", "a verdade", "você precisa", "atenção", "não faça", "vou te falar"
39
+ ]
40
+ KEYWORDS_PAYOFF = [
41
+ "portanto", "então", "resultado", "conclusão", "resumo", "é por isso", "fica assim",
42
+ "no final", "o ponto é", "por fim", "pra encerrar"
43
+ ]
44
+
45
+ def chunk_sentences(segments: List[Segment], max_gap: float = 0.7, max_len: float = 45.0):
46
+ blocks = []
47
+ cur = {"start": None, "end": None, "text": []}
48
+ for s in segments:
49
+ if cur["start"] is None:
50
+ cur["start"] = s.start; cur["end"] = s.end; cur["text"] = [s.text]
51
+ else:
52
+ gap = s.start - cur["end"]
53
+ if gap <= max_gap and (s.end - cur["start"]) <= max_len:
54
+ cur["end"] = s.end; cur["text"].append(s.text)
55
+ else:
56
+ blocks.append({"start": float(cur["start"]), "end": float(cur["end"]), "text": " ".join(cur["text"]).strip()})
57
+ cur = {"start": s.start, "end": s.end, "text": [s.text]}
58
+ if cur["start"] is not None:
59
+ blocks.append({"start": float(cur["start"]), "end": float(cur["end"]), "text": " ".join(cur["text"]).strip()})
60
+ return blocks
61
+
62
+ def score_hook(txt: str) -> float:
63
+ low = txt.lower(); s = 0.0
64
+ for w in KEYWORDS_HOOK:
65
+ if w in low: s += 1.0
66
+ s += 0.3 * low.count("!")
67
+ if "?" in low: s += 0.5
68
+ if len(low.split()) <= 22: s += 0.6
69
+ return s
70
+
71
+ def score_payoff(txt: str) -> float:
72
+ low = txt.lower(); s = 0.0
73
+ for w in KEYWORDS_PAYOFF:
74
+ if w in low: s += 1.0
75
+ return s
76
+
77
+ # -------- VideoExport helpers (ffmpeg) --------
78
+ class VideoExport:
79
+ @staticmethod
80
+ def vf_for_mode(ar_mode: str) -> str | None:
81
+ if not ar_mode or "Original" in ar_mode:
82
+ return None
83
+ if "9:16" in ar_mode:
84
+ return "crop=floor(ih*9/16):ih:(iw-floor(ih*9/16))/2:0,scale=1080:1920"
85
+ if "1:1" in ar_mode or "Quadrado" in ar_mode:
86
+ return "crop=min(iw\\,ih):min(iw\\,ih):(iw-min(iw\\,ih))/2:(ih-min(iw\\,ih))/2,scale=1080:1080"
87
+ if "4:5" in ar_mode or "Retrato" in ar_mode:
88
+ return "crop=floor(ih*4/5):ih:(iw-floor(ih*4/5))/2:0,scale=1080:1350"
89
+ return None
90
+
91
+ @staticmethod
92
+ def tighten_parts(segments: List[Segment], start: float, end: float,
93
+ gap_threshold: float = 0.6, pad: float = 0.08):
94
+ segs = [s for s in segments if s.end > start and s.start < end]
95
+ if not segs:
96
+ return [(start, max(start + 0.5, end))]
97
+ trimmed = []
98
+ for s in segs:
99
+ s0 = max(start, float(s.start)); s1 = min(end, float(s.end))
100
+ if s1 > s0: trimmed.append((s0, s1))
101
+ if not trimmed:
102
+ return [(start, max(start + 0.5, end))]
103
+ parts = []
104
+ cur_s, cur_e = trimmed[0][0] - pad, trimmed[0][1] + pad
105
+ cur_s = max(start, cur_s)
106
+ for (a0, a1), (b0, b1) in zip(trimmed[:-1], trimmed[1:]):
107
+ gap = b0 - a1
108
+ if gap <= gap_threshold:
109
+ cur_e = b1 + pad
110
+ else:
111
+ cur_e = min(end, max(cur_e, cur_s + 0.25))
112
+ parts.append((cur_s, cur_e))
113
+ cur_s = max(start, b0 - pad)
114
+ if parts and cur_s < parts[-1][1]:
115
+ cur_s = parts[-1][1] + 0.01
116
+ cur_e = b1 + pad
117
+ cur_e = min(end, max(cur_e, cur_s + 0.25))
118
+ parts.append((cur_s, cur_e))
119
+ sane = []
120
+ for s, e in parts:
121
+ s = max(start, s); e = min(end, e)
122
+ if e > s + 0.05: sane.append((s, e))
123
+ if not sane:
124
+ sane = [(start, max(start + 0.5, end))]
125
+ return sane
126
+
127
+ @staticmethod
128
+ def export_part(src: str, start: float, end: float, out_path: str, ar_mode: str = "Original"):
129
+ if ffmpeg is None:
130
+ raise RuntimeError("ffmpeg-python não instalado")
131
+ s = max(0.0, float(start)); e = max(float(end), s + 0.25); dur = max(0.25, e - s)
132
+ vf = VideoExport.vf_for_mode(ar_mode)
133
+ inp = ffmpeg.input(src, ss=s)
134
+ if vf:
135
+ stream = inp.output(
136
+ out_path, t=dur, vcodec="libx264", acodec="aac",
137
+ video_bitrate="4000k", audio_bitrate="160k",
138
+ vf=vf, movflags="+faststart"
139
+ )
140
+ else:
141
+ stream = inp.output(
142
+ out_path, t=dur, vcodec="libx264", acodec="aac",
143
+ video_bitrate="4000k", audio_bitrate="160k",
144
+ movflags="+faststart"
145
+ )
146
+ (stream.overwrite_output().global_args("-loglevel", "error").run())
147
+
148
+ @staticmethod
149
+ def concat_parts(part_paths: List[str], out_path: str, reencode_if_needed: bool = False):
150
+ if ffmpeg is None:
151
+ raise RuntimeError("ffmpeg-python não instalado")
152
+ import tempfile
153
+ with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt", encoding="utf-8", newline="\n") as f:
154
+ for p in part_paths:
155
+ safe = Path(p).resolve().as_posix().replace("'", r"\'")
156
+ f.write(f"file '{safe}'\n")
157
+ list_path = f.name
158
+ inp = ffmpeg.input(list_path, format="concat", safe=0)
159
+ out = (inp.output(out_path, vcodec="libx264", acodec="aac", movflags="+faststart")
160
+ if reencode_if_needed else
161
+ inp.output(out_path, c="copy", movflags="+faststart"))
162
+ (out.overwrite_output().global_args("-loglevel", "error").run())
163
+
164
+ @staticmethod
165
+ def export_tightened_clip(src: str, segments: List[Segment],
166
+ start: float, end: float, out_path: str,
167
+ gap_threshold: float = 0.6, pad: float = 0.08,
168
+ tmp_dir: str | None = None,
169
+ ar_mode: str = "Original"):
170
+ if ffmpeg is None:
171
+ raise RuntimeError("ffmpeg-python não instalado")
172
+ base_tmp = Path(tmp_dir or Path(out_path).parent).resolve()
173
+ base_tmp.mkdir(parents=True, exist_ok=True)
174
+ parts = VideoExport.tighten_parts(segments, start, end, gap_threshold=gap_threshold, pad=pad)
175
+ part_paths = []
176
+ for i, (s, e) in enumerate(parts, 1):
177
+ tmp = (base_tmp / f"_tmp_{Path(out_path).stem}_{i:03d}.mp4").resolve().as_posix()
178
+ VideoExport.export_part(src, s, e, tmp, ar_mode=ar_mode)
179
+ part_paths.append(tmp)
180
+ try:
181
+ VideoExport.concat_parts(part_paths, Path(out_path).resolve().as_posix(), reencode_if_needed=False)
182
+ except Exception:
183
+ VideoExport.concat_parts(part_paths, Path(out_path).resolve().as_posix(), reencode_if_needed=True)
184
+ for p in part_paths:
185
+ try: os.remove(p)
186
+ except Exception: pass
187
+
188
+ # -------- Transcription --------
189
+ def transcribe(video_path: str, model_size: str = "small") -> List[Segment]:
190
+ if WhisperModel is None:
191
+ raise RuntimeError("faster-whisper não está instalado.")
192
+ model = WhisperModel(model_size, device="cuda" if _has_cuda() else "cpu")
193
+ segments, info = model.transcribe(video_path, language="pt", vad_filter=False)
194
+ result = []
195
+ import numpy as np
196
+ for seg in segments:
197
+ conf = getattr(seg, "avg_logprob", None)
198
+ if conf is None: conf = -0.5
199
+ conf = float(np.clip(conf, -1, 1))
200
+ result.append(Segment(seg.start, seg.end, seg.text.strip(), conf))
201
+ return result
202
+
203
+ def _has_cuda():
204
+ try:
205
+ import torch
206
+ return torch.cuda.is_available()
207
+ except Exception:
208
+ return False
209
+
210
+ # -------- Linear cuts --------
211
+ def generate_linear_cuts(src_path: str, segments: List[Segment], out_dir: str,
212
+ min_len: int = 600, max_len: int = 900, ideal_len: int = 900,
213
+ k: int = 3, gap_threshold: float = 0.6, pad: float = 0.08,
214
+ ar_mode: str = "Original") -> list[str]:
215
+ outdir = Path(out_dir); outdir.mkdir(parents=True, exist_ok=True)
216
+ clips = _generate_candidates(segments, float(min_len), float(max_len), float(ideal_len))
217
+ if not clips:
218
+ return []
219
+ top = _select_top(clips, k=int(k))
220
+ outputs = []
221
+ for idx, clip in enumerate(top, 1):
222
+ out_path = outdir / f"simples_{idx:02d}.mp4"
223
+ VideoExport.export_tightened_clip(src_path, segments, clip.start, clip.end, str(out_path),
224
+ gap_threshold=float(gap_threshold), pad=float(pad),
225
+ tmp_dir=str(outdir), ar_mode=ar_mode)
226
+ outputs.append(str(out_path))
227
+ return outputs
228
+
229
+ def _generate_candidates(segs: List[Segment], min_len: float, max_len: float, ideal_len: float) -> List[ClipCandidate]:
230
+ clips: List[ClipCandidate] = []
231
+ n = len(segs)
232
+ for i in range(n):
233
+ start = segs[i].start
234
+ text_parts, confs = [], []
235
+ end = start
236
+ for j in range(i, n):
237
+ end = segs[j].end
238
+ dur = end - start
239
+ if dur > max_len: break
240
+ text_parts.append(segs[j].text); confs.append(segs[j].conf)
241
+ if dur >= min_len:
242
+ t = " ".join(text_parts)
243
+ score = _score_text(t) + (float(np.mean(confs)) if confs else 0.0)
244
+ gap = segs[j+1].start - segs[j].end if j + 1 < n else 0.0
245
+ if gap >= 0.6: score += 0.3
246
+ ideal = 1.0 - abs((dur - ideal_len) / max(ideal_len, 1.0))
247
+ score += 0.4 * ideal
248
+ clips.append(ClipCandidate(start, end, float(score), t))
249
+ return clips
250
+
251
+ def _score_text(t: str) -> float:
252
+ low = t.lower(); s = 0.0
253
+ for w in KEYWORDS_HOOK:
254
+ if w in low: s += 0.6
255
+ for w in ["resultado", "portanto", "então", "conclus", "resumo"]:
256
+ if w in low: s += 0.3
257
+ s += 0.2 * low.count("!")
258
+ s += 0.1 * sum(ch.isdigit() for ch in low)
259
+ if low.strip().startswith(("como ", "por que", "o que", "qual ", "você ", "descobri", "aprendi")):
260
+ s += 0.4
261
+ return s
262
+
263
+ def _select_top(clips: List[ClipCandidate], k: int = 8) -> List[ClipCandidate]:
264
+ clips = sorted(clips, key=lambda c: c.score, reverse=True)
265
+ selected: List[ClipCandidate] = []
266
+ for c in clips:
267
+ if len(selected) >= k: break
268
+ if all(_iou_1d((c.start, c.end), (s.start, s.end)) < 0.3 for s in selected):
269
+ selected.append(c)
270
+ return selected
271
+
272
+ def _iou_1d(a: Tuple[float, float], b: Tuple[float, float]) -> float:
273
+ s1, e1 = a; s2, e2 = b
274
+ inter = max(0.0, min(e1, e2) - max(s1, s2))
275
+ uni = (e1 - s1) + (e2 - s2) - inter
276
+ return inter / uni if uni > 0 else 0.0
277
+
278
+ # -------- Creative cuts (non-linear) --------
279
+ def generate_creative_cuts(src_path: str, segments: List[Segment], out_dir: str,
280
+ min_len: int = 600, max_len: int = 900, ideal_len: int = 900,
281
+ min_blocks: int = 3, max_blocks: int = 8,
282
+ k: int = 2, gap_threshold: float = 0.6, pad: float = 0.08,
283
+ ar_mode: str = "Original") -> list[str]:
284
+ from sentence_transformers import SentenceTransformer
285
+ outdir = Path(out_dir); outdir.mkdir(parents=True, exist_ok=True)
286
+
287
+ blocks = chunk_sentences(segments, max_gap=0.7, max_len=45.0)
288
+ if len(blocks) < int(min_blocks):
289
+ return []
290
+
291
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
292
+ emb = embedder.encode([b['text'] for b in blocks], show_progress_bar=False, normalize_embeddings=True)
293
+ emb = np.asarray(emb, dtype=np.float32)
294
+
295
+ hook_scores = np.array([score_hook(b['text']) for b in blocks], dtype=float)
296
+ payoff_scores = np.array([score_payoff(b['text']) for b in blocks], dtype=float)
297
+
298
+ sequences = _assemble_sequences(blocks, emb, hook_scores, payoff_scores,
299
+ min_len=float(min_len), max_len=float(max_len), ideal_len=float(ideal_len),
300
+ max_blocks=int(max_blocks))
301
+ outputs = []
302
+ if not sequences:
303
+ # Fallback greedy
304
+ outs = _fallback_beam_greedy(outdir, blocks, emb, hook_scores, segments, src_path,
305
+ gap_threshold=float(gap_threshold), pad=float(pad), ar_mode=ar_mode,
306
+ max_len=float(max_len))
307
+ return outs
308
+
309
+ for idx, seq in enumerate(sequences[: int(k)], 1):
310
+ out_path = outdir / f"criativo_{idx:02d}.mp4"
311
+ part_paths = []
312
+ for j, b in enumerate(seq, 1):
313
+ s, e = float(b["start"]), float(b["end"])
314
+ tmp = (outdir / f"_tmp_comp{idx:02d}_{j:03d}.mp4").resolve().as_posix()
315
+ VideoExport.export_tightened_clip(src_path, segments, s, e, tmp,
316
+ gap_threshold=float(gap_threshold), pad=float(pad),
317
+ tmp_dir=str(outdir), ar_mode=ar_mode)
318
+ part_paths.append(tmp)
319
+ try:
320
+ VideoExport.concat_parts(part_paths, out_path.resolve().as_posix(), reencode_if_needed=False)
321
+ except Exception:
322
+ VideoExport.concat_parts(part_paths, out_path.resolve().as_posix(), reencode_if_needed=True)
323
+ for p in part_paths:
324
+ try: os.remove(p)
325
+ except Exception: pass
326
+ outputs.append(str(out_path))
327
+ return outputs
328
+
329
+ # --- internal helpers for creative ---
330
+ def _assemble_sequences(blocks: List[Dict[str, Any]], emb: np.ndarray,
331
+ hook_scores: np.ndarray, payoff_scores: np.ndarray,
332
+ min_len: float, max_len: float, ideal_len: float, max_blocks: int):
333
+ N = len(blocks)
334
+ idx_sorted = np.argsort(-hook_scores)
335
+ top_ganchos = idx_sorted[: max(5, N // 10)]
336
+ sequences = []
337
+ W_HOOK, W_SIM, W_PAY, W_IDEAL, W_DIVER = 1.2, 1.0, 0.9, 0.5, 0.2
338
+
339
+ for h in top_ganchos:
340
+ init = ([h], blocks[h]["end"] - blocks[h]["start"], W_HOOK * hook_scores[h])
341
+ beam = [init]
342
+ for _ in range(max_blocks - 1):
343
+ new_beam = []
344
+ for inds, dur, sc in beam:
345
+ unused = [i for i in range(N) if i not in inds]
346
+ if not unused: new_beam.append((inds, dur, sc)); continue
347
+ last = inds[-1]; last_end = blocks[last]["end"]
348
+ v_last = emb[last]
349
+ sims = emb[unused] @ v_last
350
+ cand_order = np.argsort(-sims)[:20]
351
+ for cpos in cand_order:
352
+ j = unused[cpos]; b = blocks[j]
353
+ jump = abs(b["start"] - last_end)
354
+ if b["start"] >= last_end and jump < 30.0: # anti-linear
355
+ continue
356
+ d_add = b["end"] - b["start"]; dur2 = dur + d_add
357
+ if dur2 > max_len: continue
358
+ pen = 0.0
359
+ if b["start"] >= last_end and (b["start"] - last_end) <= 20.0:
360
+ pen += 0.9
361
+ elif b["start"] >= last_end and (b["start"] - last_end) <= 40.0:
362
+ pen += 0.4
363
+ diversity = 0.0
364
+ if len(inds) >= 2:
365
+ prev = blocks[inds[-2]]
366
+ jump_prev = abs(blocks[last]["start"] - prev["end"])
367
+ if abs(jump - jump_prev) > 10.0: diversity = 1.0
368
+ gain = W_SIM * float(sims[cpos]) + W_DIVER * diversity - pen
369
+ new_beam.append((inds + [j], dur2, sc + gain))
370
+ new_beam.sort(key=lambda x: x[2], reverse=True)
371
+ beam = new_beam[:20]
372
+
373
+ finished = []
374
+ for inds, dur, sc in beam:
375
+ unused = [i for i in range(N) if i not in inds]
376
+ best_end = (inds, dur, sc)
377
+ for j in unused:
378
+ b = blocks[j]; last = inds[-1]; last_end = blocks[last]["end"]
379
+ jump = abs(b["start"] - last_end)
380
+ if b["start"] >= last_end and jump < 30.0: continue
381
+ d_add = b["end"] - b["start"]
382
+ if dur + d_add > max_len: continue
383
+ sc2 = sc + W_PAY * payoff_scores[j]
384
+ cand = (inds + [j], dur + d_add, sc2)
385
+ if cand[1] >= min_len and cand[2] > best_end[2]: best_end = cand
386
+ dur_eff = best_end[1]
387
+ ideal = 1.0 - abs((dur_eff - ideal_len) / max(ideal_len, 1.0))
388
+ finished.append((best_end[0], best_end[1], best_end[2] + W_IDEAL * ideal))
389
+
390
+ for inds, dur, sc in sorted(finished, key=lambda x: x[2], reverse=True)[:3]:
391
+ if dur < min_len or dur > max_len: continue
392
+ sequences.append([blocks[i] for i in inds])
393
+
394
+ uniq, seen = [], set()
395
+ for seq in sequences:
396
+ key = tuple((round(b["start"], 1), round(b["end"], 1)) for b in seq)
397
+ if key in seen: continue
398
+ seen.add(key); uniq.append(seq)
399
+ return uniq
400
+
401
+ def _fallback_beam_greedy(outdir: Path, blocks: List[dict], emb: np.ndarray, hook_scores: np.ndarray,
402
+ segments: List[Segment], src_path: str,
403
+ gap_threshold: float, pad: float, ar_mode: str, max_len: float) -> list[str]:
404
+ outputs = []
405
+ if len(blocks) == 0: return outputs
406
+ h = int(np.argmax(hook_scores))
407
+ used = {h}; seq = [blocks[h]]
408
+ dur = float(blocks[h]['end'] - blocks[h]['start'])
409
+ while dur < max_len and len(seq) < 8:
410
+ last_idx = blocks.index(seq[-1]); last_end = blocks[last_idx]["end"]
411
+ sims = emb @ emb[last_idx]; order = np.argsort(-sims)
412
+ picked = None
413
+ for j in order:
414
+ if j in used: continue
415
+ b = blocks[j]
416
+ if b["start"] >= last_end:
417
+ jump = b["start"] - last_end
418
+ if jump < 30.0: continue
419
+ d_add = b["end"] - b["start"]
420
+ if dur + d_add <= max_len:
421
+ picked = j; break
422
+ if picked is None: break
423
+ used.add(picked); seq.append(blocks[picked])
424
+ dur += float(blocks[picked]['end'] - blocks[picked]['start'])
425
+ if dur < 60: # require minimum 1 min to avoid trivially short outputs
426
+ return outputs
427
+ out_path = outdir / "Corte criativo.mp4"
428
+ part_paths = []
429
+ for j, b in enumerate(seq, 1):
430
+ s, e = float(b['start']), float(b['end'])
431
+ tmp = (outdir / f"_tmp_greedy_{j:03d}.mp4").resolve().as_posix()
432
+ VideoExport.export_tightened_clip(src_path, segments, s, e, tmp,
433
+ gap_threshold=float(gap_threshold), pad=float(pad),
434
+ tmp_dir=str(outdir), ar_mode=ar_mode)
435
+ part_paths.append(tmp)
436
+ try:
437
+ VideoExport.concat_parts(part_paths, out_path.resolve().as_posix(), reencode_if_needed=False)
438
+ except Exception:
439
+ VideoExport.concat_parts(part_paths, out_path.resolve().as_posix(), reencode_if_needed=True)
440
+ for p in part_paths:
441
+ try: os.remove(p)
442
+ except Exception: pass
443
+ outputs.append(str(out_path)); return outputs
huggingface.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "title": "Editor de cortes autom\u00e1tico (Space)",
3
+ "emoji": "\ud83c\udfac",
4
+ "colorFrom": "green",
5
+ "colorTo": "gray",
6
+ "sdk": "gradio",
7
+ "sdk_version": "4.44.0",
8
+ "app_file": "app.py",
9
+ "pinned": false
10
+ }
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements (1).txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Runtime deps (Space)
2
+ gradio>=4.44.0
3
+ faster-whisper>=1.0.0
4
+ ffmpeg-python>=0.2.0
5
+ numpy>=1.24
6
+ rich>=13.0
7
+ sentence-transformers>=3.0.0
8
+ torch
9
+ # optional for GPU wheels (let HF pick the right one)