Spaces:
Sleeping
Sleeping
| """ | |
| V34 | |
| """ | |
| import gradio as gr | |
| import cv2 | |
| import numpy as np | |
| import whisper | |
| import subprocess | |
| from pathlib import Path | |
| from dataclasses import dataclass | |
| from typing import List, Tuple, Optional | |
| import tempfile | |
| import os | |
| import shutil | |
| # ======================= DATACLASSES ======================= | |
| class Segment: | |
| """Representa um segmento de transcrição com timestamps.""" | |
| start: float | |
| end: float | |
| text: str | |
| def __repr__(self): | |
| return f"Segment({self.start:.1f}-{self.end:.1f}: {self.text[:50]}...)" | |
| class FaceBox: | |
| """Representa uma detecção de rosto.""" | |
| x: int | |
| y: int | |
| w: int | |
| h: int | |
| center_x: int | |
| center_y: int | |
| confidence: float = 1.0 | |
| # ======================= FACE TRACKING ======================= | |
| class FaceTracker: | |
| """Rastreador de rostos para crop inteligente.""" | |
| def __init__(self): | |
| cascade_paths = [ | |
| cv2.data.haarcascades + 'haarcascade_frontalface_default.xml', | |
| cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml', | |
| ] | |
| self.face_cascade = None | |
| for path in cascade_paths: | |
| try: | |
| self.face_cascade = cv2.CascadeClassifier(path) | |
| if not self.face_cascade.empty(): | |
| break | |
| except: | |
| continue | |
| self.enabled = self.face_cascade is not None and not self.face_cascade.empty() | |
| if self.enabled: | |
| print("✅ Detector de rostos carregado") | |
| else: | |
| print("⚠️ Detector de rostos não disponível - usando crop centralizado") | |
| def detect_faces(self, frame: np.ndarray) -> List[FaceBox]: | |
| if not self.enabled: | |
| return [] | |
| gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
| faces = self.face_cascade.detectMultiScale( | |
| gray, scaleFactor=1.1, minNeighbors=5, | |
| minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE | |
| ) | |
| face_boxes = [] | |
| for (x, y, w, h) in faces: | |
| center_x = x + w // 2 | |
| center_y = y + h // 2 | |
| face_boxes.append(FaceBox(x, y, w, h, center_x, center_y)) | |
| return face_boxes | |
| def get_primary_face(self, faces: List[FaceBox], frame_width: int, frame_height: int) -> Optional[FaceBox]: | |
| if not faces: | |
| return None | |
| if len(faces) == 1: | |
| return faces[0] | |
| frame_center_x = frame_width / 2 | |
| frame_center_y = frame_height / 2 | |
| scored_faces = [] | |
| for face in faces: | |
| size_score = (face.w * face.h) / (frame_width * frame_height) | |
| dx = abs(face.center_x - frame_center_x) / frame_width | |
| dy = abs(face.center_y - frame_center_y) / frame_height | |
| center_score = 1 - (dx + dy) / 2 | |
| total_score = (size_score * 0.3) + (center_score * 0.7) | |
| scored_faces.append((total_score, face)) | |
| scored_faces.sort(reverse=True, key=lambda x: x[0]) | |
| return scored_faces[0][1] | |
| def calculate_smart_crop(self, frame: np.ndarray, target_width: int, target_height: int) -> Tuple[int, int, int, int]: | |
| frame_h, frame_w = frame.shape[:2] | |
| faces = self.detect_faces(frame) | |
| primary_face = self.get_primary_face(faces, frame_w, frame_h) | |
| target_ar = target_width / target_height | |
| frame_ar = frame_w / frame_h | |
| if primary_face: | |
| face_center_x = primary_face.center_x | |
| face_center_y = primary_face.center_y | |
| if target_ar < frame_ar: # Vertical | |
| crop_w = int(frame_h * target_ar) | |
| crop_h = frame_h | |
| crop_x = max(0, min(face_center_x - crop_w // 2, frame_w - crop_w)) | |
| crop_y = 0 | |
| else: # Horizontal/Quadrado | |
| crop_w = frame_w | |
| crop_h = int(frame_w / target_ar) | |
| offset = int(crop_h * 0.1) | |
| crop_x = 0 | |
| crop_y = max(0, min(face_center_y - crop_h // 2 - offset, frame_h - crop_h)) | |
| else: | |
| # Fallback centralizado | |
| if target_ar < frame_ar: | |
| crop_w = int(frame_h * target_ar) | |
| crop_h = frame_h | |
| crop_x = (frame_w - crop_w) // 2 | |
| crop_y = 0 | |
| else: | |
| crop_w = frame_w | |
| crop_h = int(frame_w / target_ar) | |
| crop_x = 0 | |
| crop_y = (frame_h - crop_h) // 2 | |
| return (crop_x, crop_y, crop_w, crop_h) | |
| # ======================= TRANSCRIÇÃO ======================= | |
| def transcribe(video_file: str, model_size: str = "small") -> List[Segment]: | |
| print(f"🎙️ Carregando modelo Whisper: {model_size}") | |
| model = whisper.load_model(model_size) | |
| print(f"🎬 Transcrevendo: {video_file}") | |
| result = model.transcribe(video_file, language="pt", verbose=False) | |
| segments = [] | |
| for seg in result["segments"]: | |
| segments.append(Segment( | |
| start=seg["start"], | |
| end=seg["end"], | |
| text=seg["text"].strip() | |
| )) | |
| print(f"✅ Transcrição completa: {len(segments)} segmentos") | |
| return segments | |
| # ======================= PROCESSAMENTO DE VÍDEO ======================= | |
| def extract_video_segment(input_video: str, output_video: str, start_time: float, end_time: float) -> bool: | |
| duration = end_time - start_time | |
| cmd = [ | |
| "ffmpeg", "-y", "-ss", str(start_time), "-i", input_video, | |
| "-t", str(duration), "-c:v", "libx264", "-c:a", "aac", | |
| "-strict", "experimental", output_video | |
| ] | |
| try: | |
| subprocess.run(cmd, check=True, capture_output=True) | |
| return True | |
| except subprocess.CalledProcessError as e: | |
| print(f"❌ Erro ao extrair: {e}") | |
| return False | |
| def apply_smart_crop_to_video(input_path: str, output_path: str, target_width: int, | |
| target_height: int, sample_frames: int = 10) -> bool: | |
| tracker = FaceTracker() | |
| cap = cv2.VideoCapture(input_path) | |
| if not cap.isOpened(): | |
| print(f"❌ Erro ao abrir: {input_path}") | |
| return False | |
| fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
| frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| # Amostragem para suavização | |
| sample_positions = [] | |
| frame_indices = np.linspace(0, frame_count - 1, min(sample_frames, frame_count), dtype=int) | |
| for idx in frame_indices: | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, idx) | |
| ret, frame = cap.read() | |
| if ret: | |
| crop_coords = tracker.calculate_smart_crop(frame, target_width, target_height) | |
| sample_positions.append(crop_coords) | |
| # Posição média (suavizada) | |
| if sample_positions: | |
| avg_x = int(np.median([p[0] for p in sample_positions])) | |
| avg_y = int(np.median([p[1] for p in sample_positions])) | |
| crop_w = sample_positions[0][2] | |
| crop_h = sample_positions[0][3] | |
| final_crop = (avg_x, avg_y, crop_w, crop_h) | |
| else: | |
| # Fallback | |
| target_ar = target_width / target_height | |
| frame_ar = frame_w / frame_h | |
| if target_ar < frame_ar: | |
| crop_w = int(frame_h * target_ar) | |
| crop_h = frame_h | |
| final_crop = ((frame_w - crop_w) // 2, 0, crop_w, crop_h) | |
| else: | |
| crop_w = frame_w | |
| crop_h = int(frame_w / target_ar) | |
| final_crop = (0, (frame_h - crop_h) // 2, crop_w, crop_h) | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, 0) | |
| # Writer | |
| fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
| out = cv2.VideoWriter(output_path, fourcc, fps, (target_width, target_height)) | |
| if not out.isOpened(): | |
| print(f"❌ Erro ao criar saída: {output_path}") | |
| cap.release() | |
| return False | |
| print(f"🎬 Processando com crop: {final_crop}") | |
| frame_num = 0 | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| x, y, w, h = final_crop | |
| cropped = frame[y:y+h, x:x+w] | |
| resized = cv2.resize(cropped, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4) | |
| out.write(resized) | |
| frame_num += 1 | |
| if frame_num % 30 == 0: | |
| progress = (frame_num / frame_count) * 100 | |
| print(f" {progress:.1f}% ({frame_num}/{frame_count})") | |
| cap.release() | |
| out.release() | |
| print(f"✅ Concluído: {output_path}") | |
| return True | |
| def apply_aspect_ratio(input_video: str, output_video: str, ar_mode: str, face_tracking: bool = False) -> bool: | |
| if ar_mode == "Original": | |
| shutil.copy(input_video, output_video) | |
| return True | |
| ar_dims = { | |
| "Vertical 9:16": (1080, 1920), | |
| "Quadrado 1:1": (1080, 1080), | |
| "Retrato 4:5": (1080, 1350), | |
| } | |
| if ar_mode not in ar_dims: | |
| return False | |
| width, height = ar_dims[ar_mode] | |
| if face_tracking: | |
| return apply_smart_crop_to_video(input_video, output_video, width, height) | |
| else: | |
| # Crop centralizado tradicional | |
| cmd = [ | |
| "ffmpeg", "-y", "-i", input_video, | |
| "-vf", f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}", | |
| "-c:a", "copy", output_video | |
| ] | |
| try: | |
| subprocess.run(cmd, check=True, capture_output=True) | |
| return True | |
| except subprocess.CalledProcessError: | |
| return False | |
| def concatenate_videos(video_files: List[str], output_file: str) -> bool: | |
| if not video_files: | |
| return False | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: | |
| list_file = f.name | |
| for vf in video_files: | |
| f.write(f"file '{os.path.abspath(vf)}'\n") | |
| try: | |
| cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", output_file] | |
| subprocess.run(cmd, check=True, capture_output=True) | |
| return True | |
| except subprocess.CalledProcessError: | |
| return False | |
| finally: | |
| Path(list_file).unlink(missing_ok=True) | |
| # ======================= LEGENDAS CRIATIVAS ======================= | |
| def highlight_keywords(text: str) -> List[Tuple[str, bool]]: | |
| """ | |
| Identifica palavras-chave para destaque. | |
| Retorna lista de (palavra, is_highlighted) | |
| """ | |
| keywords = [ | |
| # Ação/Imperativo | |
| "tem que", "precisa", "deve", "faça", "veja", "olha", "escuta", | |
| # Negação/Contraste | |
| "não", "nunca", "jamais", "mas", "porém", "entretanto", | |
| # Impacto | |
| "problema", "solução", "segredo", "verdade", "realidade", | |
| # Números | |
| "milhão", "mil", "bilhão", "100%", "zero", | |
| # Emoção | |
| "incrível", "impossível", "fácil", "difícil", "importante", | |
| # Ação mental | |
| "imagina", "pensa", "considera", "decide", "escolhe" | |
| ] | |
| words = text.split() | |
| result = [] | |
| for word in words: | |
| word_lower = word.lower().strip(".,!?") | |
| is_key = any(k in word_lower for k in keywords) | |
| result.append((word, is_key)) | |
| return result | |
| def create_subtitle_clip(text: str, start: float, end: float, | |
| video_width: int, video_height: int, | |
| style: str = "hormozi") -> str: | |
| """ | |
| Cria arquivo ASS (Advanced SubStation Alpha) com legendas estilizadas. | |
| Retorna caminho do arquivo .ass | |
| """ | |
| if style == "hormozi": | |
| # Estilo Alex Hormozi | |
| style_def = """[V4+ Styles] | |
| Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding | |
| Style: Default,Montserrat,72,&H00FFFF,&H00FFFF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,10,10,80,1 | |
| Style: Highlight,Montserrat,78,&H0000FFFF,&H0000FFFF,&H00000000,&H80000000,-1,0,0,0,110,110,0,0,1,4,3,2,10,10,80,1""" | |
| # Processa texto com highlights | |
| words_with_highlight = highlight_keywords(text) | |
| # Divide em linhas (max 40 caracteres por linha) | |
| lines = [] | |
| current_line = [] | |
| current_length = 0 | |
| for word, is_highlight in words_with_highlight: | |
| word_len = len(word) + 1 | |
| if current_length + word_len > 40 and current_line: | |
| lines.append(current_line) | |
| current_line = [(word, is_highlight)] | |
| current_length = word_len | |
| else: | |
| current_line.append((word, is_highlight)) | |
| current_length += word_len | |
| if current_line: | |
| lines.append(current_line) | |
| # Limita a 2 linhas | |
| if len(lines) > 2: | |
| lines = lines[:2] | |
| # Gera texto formatado | |
| formatted_lines = [] | |
| for line in lines: | |
| line_text = [] | |
| for word, is_highlight in line: | |
| if is_highlight: | |
| # Destaque: maior, amarelo vibrante, caps | |
| line_text.append(f"{{\\1c&H00FFFF&\\fs78\\b1}}{word.upper()}{{\\r}}") | |
| else: | |
| line_text.append(word) | |
| formatted_lines.append(" ".join(line_text)) | |
| final_text = "\\N".join(formatted_lines) # \N = quebra de linha no ASS | |
| # Cria arquivo ASS | |
| fd, ass_path = tempfile.mkstemp(suffix=".ass") | |
| with os.fdopen(fd, 'w', encoding='utf-8') as f: | |
| f.write("""[Script Info] | |
| Title: Viral Subtitles | |
| ScriptType: v4.00+ | |
| WrapStyle: 0 | |
| PlayResX: """ + str(video_width) + """ | |
| PlayResY: """ + str(video_height) + """ | |
| ScaledBorderAndShadow: yes | |
| """) | |
| f.write(style_def + "\n\n") | |
| f.write("""[Events] | |
| Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
| """) | |
| # Converte tempo para formato ASS (0:00:00.00) | |
| def format_time(seconds): | |
| h = int(seconds // 3600) | |
| m = int((seconds % 3600) // 60) | |
| s = seconds % 60 | |
| return f"{h}:{m:02d}:{s:05.2f}" | |
| start_time = format_time(start) | |
| end_time = format_time(end) | |
| f.write(f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{final_text}\n") | |
| return ass_path | |
| def add_subtitles_to_video(input_video: str, output_video: str, | |
| segments: List[Segment], style: str = "hormozi") -> bool: | |
| """ | |
| Adiciona legendas estilizadas ao vídeo usando FFmpeg + ASS. | |
| """ | |
| # Pega dimensões do vídeo | |
| cap = cv2.VideoCapture(input_video) | |
| video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| cap.release() | |
| # Cria arquivo ASS completo com todos os segmentos | |
| fd, ass_path = tempfile.mkstemp(suffix=".ass") | |
| with os.fdopen(fd, 'w', encoding='utf-8') as f: | |
| # Header | |
| f.write(f"""[Script Info] | |
| Title: Viral Subtitles | |
| ScriptType: v4.00+ | |
| WrapStyle: 0 | |
| PlayResX: {video_width} | |
| PlayResY: {video_height} | |
| ScaledBorderAndShadow: yes | |
| [V4+ Styles] | |
| Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding | |
| Style: Default,Montserrat,68,&H00FFFF00,&H00FFFF00,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,10,10,60,1 | |
| [Events] | |
| Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
| """) | |
| # Adiciona cada segmento | |
| for seg in segments: | |
| words_with_highlight = highlight_keywords(seg.text) | |
| # Formata texto | |
| formatted_words = [] | |
| for word, is_highlight in words_with_highlight: | |
| if is_highlight: | |
| formatted_words.append(f"{{\\1c&H0000FFFF&\\fs76\\b1}}{word.upper()}{{\\r}}") | |
| else: | |
| formatted_words.append(word) | |
| text = " ".join(formatted_words) | |
| # Quebra em linhas (max 40 chars) | |
| words = text.split() | |
| lines = [] | |
| current = [] | |
| length = 0 | |
| for w in words: | |
| w_len = len(w.replace("{\\1c&H0000FFFF&\\fs76\\b1}", "").replace("{\\r}", "")) + 1 | |
| if length + w_len > 40 and current: | |
| lines.append(" ".join(current)) | |
| current = [w] | |
| length = w_len | |
| else: | |
| current.append(w) | |
| length += w_len | |
| if current: | |
| lines.append(" ".join(current)) | |
| final_text = "\\N".join(lines[:2]) # Max 2 linhas | |
| # Formato de tempo ASS | |
| def fmt_time(s): | |
| h = int(s // 3600) | |
| m = int((s % 3600) // 60) | |
| sec = s % 60 | |
| return f"{h}:{m:02d}:{sec:05.2f}" | |
| start_str = fmt_time(seg.start) | |
| end_str = fmt_time(seg.end) | |
| f.write(f"Dialogue: 0,{start_str},{end_str},Default,,0,0,0,,{final_text}\n") | |
| # Aplica legendas com FFmpeg | |
| print(f"[legendas] Aplicando estilo {style}...") | |
| # Escape do caminho para FFmpeg (Windows/Linux) | |
| ass_path_escaped = ass_path.replace('\\', '/').replace(':', '\\:') | |
| cmd = [ | |
| "ffmpeg", "-y", | |
| "-i", input_video, | |
| "-vf", f"ass={ass_path_escaped}", | |
| "-c:v", "libx264", | |
| "-preset", "medium", | |
| "-crf", "18", | |
| "-c:a", "copy", | |
| "-movflags", "+faststart", | |
| output_video | |
| ] | |
| try: | |
| subprocess.run(cmd, check=True, capture_output=True) | |
| print(f"[legendas] ✅ Concluído: {output_video}") | |
| return True | |
| except subprocess.CalledProcessError as e: | |
| print(f"[legendas] ❌ Erro: {e}") | |
| return False | |
| finally: | |
| try: | |
| Path(ass_path).unlink(missing_ok=True) | |
| except: | |
| pass | |
| def score_segment_virality(seg: Segment, idx: int, total: int) -> float: | |
| """ | |
| Pontua um segmento baseado em potencial viral. | |
| Inspirado nos padrões do OpsClip. | |
| """ | |
| score = 0.0 | |
| text = seg.text.lower() | |
| # GANCHOS (perguntas, provocações) | |
| if any(w in text for w in ["?", "por que", "qual", "como", "você"]): | |
| score += 15 | |
| # FRASES DE IMPACTO | |
| impact_phrases = [ | |
| "não dá", "problema", "esse é o", "imaginou", "é só", | |
| "mas", "porém", "entretanto", "então", "olha", | |
| "escuta", "presta atenção", "isso", "agora" | |
| ] | |
| for phrase in impact_phrases: | |
| if phrase in text: | |
| score += 8 | |
| # NEGAÇÕES E CONTRASTES (criam tensão) | |
| if any(w in text for w in ["não", "nunca", "jamais", "sem"]): | |
| score += 5 | |
| # AÇÃO/IMPERATIVO (engajamento) | |
| if any(w in text for w in ["tem que", "precisa", "deve", "faça", "veja"]): | |
| score += 7 | |
| # NÚMEROS E DADOS (autoridade) | |
| if any(c.isdigit() for c in text): | |
| score += 6 | |
| # DURAÇÃO IDEAL (15-45s = viral) | |
| duration = seg.end - seg.start | |
| if 15 <= duration <= 45: | |
| score += 20 | |
| elif 10 <= duration <= 60: | |
| score += 10 | |
| # POSIÇÃO NO VÍDEO (meio tem mais contexto) | |
| position_ratio = idx / max(1, total) | |
| if 0.2 <= position_ratio <= 0.8: # Evita extremos | |
| score += 10 | |
| # COMPLETUDE (evita frases cortadas) | |
| if text.strip().endswith((".", "!", "?", "né", "tá")): | |
| score += 8 | |
| return score | |
| def find_viral_moments(segments: List[Segment], k: int = 5) -> List[Tuple[int, int, float]]: | |
| """ | |
| Encontra os k melhores momentos virais. | |
| Retorna lista de (start_idx, end_idx, score) | |
| """ | |
| viral_windows = [] | |
| # Janelas deslizantes de diferentes tamanhos | |
| window_sizes = [1, 2, 3, 4, 5] # Quantos segmentos consecutivos | |
| for window_size in window_sizes: | |
| for i in range(len(segments) - window_size + 1): | |
| window_segments = segments[i:i+window_size] | |
| # Calcula duração total da janela | |
| total_duration = window_segments[-1].end - window_segments[0].start | |
| # Pula janelas muito longas ou curtas | |
| if total_duration < 10 or total_duration > 60: | |
| continue | |
| # Pontuação agregada da janela | |
| window_score = sum(score_segment_virality(seg, i+j, len(segments)) | |
| for j, seg in enumerate(window_segments)) | |
| # Bonus para janelas com narrativa completa | |
| combined_text = " ".join(s.text for s in window_segments) | |
| if "?" in combined_text and any(w in combined_text.lower() for w in ["porque", "então", "mas", "porém"]): | |
| window_score += 15 # Pergunta + resposta = narrativa completa | |
| viral_windows.append((i, i+window_size-1, window_score, total_duration)) | |
| # Ordena por score e remove sobreposições | |
| viral_windows.sort(key=lambda x: x[2], reverse=True) | |
| selected = [] | |
| used_indices = set() | |
| for start_idx, end_idx, score, duration in viral_windows: | |
| # Verifica se não sobrepõe com já selecionados | |
| if not any(idx in used_indices for idx in range(start_idx, end_idx + 1)): | |
| selected.append((start_idx, end_idx, score)) | |
| used_indices.update(range(start_idx, end_idx + 1)) | |
| if len(selected) >= k: | |
| break | |
| return selected | |
| # ======================= GERAÇÃO DE CORTES ======================= | |
| def generate_linear_cuts(video_file: str, segments: List[Segment], output_dir: str, | |
| min_len: float = 600, max_len: float = 900, ideal_len: float = 900, | |
| k: int = 2, gap_threshold: float = 0.60, pad: float = 0.08, | |
| ar_mode: str = "Original", face_tracking: bool = False, | |
| add_subtitles: bool = False) -> List[str]: | |
| if not segments: | |
| return [] | |
| Path(output_dir).mkdir(parents=True, exist_ok=True) | |
| total_duration = segments[-1].end - segments[0].start | |
| target_duration = min(max_len, max(min_len, total_duration / k)) | |
| outputs = [] | |
| current_start = segments[0].start | |
| for i in range(k): | |
| target_end = current_start + target_duration | |
| best_end = target_end | |
| for seg in segments: | |
| if abs(seg.end - target_end) < gap_threshold and seg.end > current_start: | |
| best_end = seg.end | |
| break | |
| if best_end - current_start > max_len: | |
| best_end = current_start + max_len | |
| start_with_pad = max(0, current_start - pad) | |
| end_with_pad = best_end + pad | |
| temp_file = Path(output_dir) / f"temp_linear_{i+1}.mp4" | |
| final_file = Path(output_dir) / f"cut_linear_{i+1}.mp4" | |
| print(f"✂️ Corte {i+1}/{k}: {start_with_pad:.1f}s - {end_with_pad:.1f}s") | |
| if extract_video_segment(video_file, str(temp_file), start_with_pad, end_with_pad): | |
| if ar_mode != "Original": | |
| if apply_aspect_ratio(str(temp_file), str(final_file), ar_mode, face_tracking): | |
| temp_file.unlink() | |
| outputs.append(str(final_file)) | |
| else: | |
| temp_file.rename(final_file) | |
| outputs.append(str(final_file)) | |
| current_start = best_end + gap_threshold | |
| if current_start >= segments[-1].end: | |
| break | |
| return outputs | |
| def generate_creative_cuts(video_file: str, segments: List[Segment], output_dir: str, | |
| min_len: float = 600, max_len: float = 900, ideal_len: float = 900, | |
| min_blocks: int = 3, max_blocks: int = 8, k: int = 2, | |
| gap_threshold: float = 0.60, pad: float = 0.08, | |
| ar_mode: str = "Original", face_tracking: bool = False, | |
| add_subtitles: bool = False) -> List[str]: | |
| if not segments or len(segments) < min_blocks: | |
| return [] | |
| Path(output_dir).mkdir(parents=True, exist_ok=True) | |
| outputs = [] | |
| import random | |
| for i in range(k): | |
| num_blocks = random.randint(min_blocks, min(max_blocks, len(segments))) | |
| step = max(1, len(segments) // num_blocks) | |
| selected_indices = [j * step for j in range(num_blocks)] | |
| selected_segments = [segments[idx] for idx in selected_indices if idx < len(segments)] | |
| block_files = [] | |
| for j, seg in enumerate(selected_segments): | |
| block_file = Path(output_dir) / f"temp_creative_{i+1}_block_{j+1}.mp4" | |
| start = max(0, seg.start - pad) | |
| end = seg.end + pad | |
| if extract_video_segment(video_file, str(block_file), start, end): | |
| block_files.append(str(block_file)) | |
| if not block_files: | |
| continue | |
| concat_file = Path(output_dir) / f"temp_creative_{i+1}_concat.mp4" | |
| if concatenate_videos(block_files, str(concat_file)): | |
| final_file = Path(output_dir) / f"cut_creative_{i+1}.mp4" | |
| if ar_mode != "Original": | |
| if apply_aspect_ratio(str(concat_file), str(final_file), ar_mode, face_tracking): | |
| concat_file.unlink() | |
| outputs.append(str(final_file)) | |
| else: | |
| concat_file.rename(final_file) | |
| outputs.append(str(final_file)) | |
| for bf in block_files: | |
| Path(bf).unlink(missing_ok=True) | |
| return outputs | |
| # ======================= INTERFACE GRADIO ======================= | |
| SPACE_OUT = Path("outputs") | |
| SPACE_OUT.mkdir(exist_ok=True, parents=True) | |
| def do_transcribe(video_file, model_size): | |
| if video_file is None: | |
| return [], "Selecione um vídeo." | |
| segs = transcribe(video_file, model_size=model_size) | |
| preview = "\n".join([f"[{s.start:.1f}–{s.end:.1f}] {s.text}" for s in segs[:12]]) | |
| return segs, f"Transcrição ok. Segmentos: {len(segs)}\n\nPrévia:\n{preview}" | |
| def run_linear(segs, video_file, out_subdir, min_len, max_len, ideal_len, k, gap, pad, ar_mode, face_tracking): | |
| if not segs: | |
| return [], "Transcreva antes de cortar." | |
| workdir = SPACE_OUT / (out_subdir or "cortes") | |
| outs = generate_linear_cuts(video_file, segs, str(workdir), min_len=min_len, max_len=max_len, | |
| ideal_len=ideal_len, k=k, gap_threshold=gap, pad=pad, | |
| ar_mode=ar_mode, face_tracking=face_tracking) | |
| return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)." | |
| def run_creative(segs, video_file, out_subdir, min_len, max_len, ideal_len, minb, maxb, k, gap, pad, ar_mode, face_tracking): | |
| if not segs: | |
| return [], "Transcreva antes de cortar." | |
| workdir = SPACE_OUT / (out_subdir or "cortes") | |
| outs = generate_creative_cuts(video_file, segs, str(workdir), min_len=min_len, max_len=max_len, | |
| ideal_len=ideal_len, min_blocks=minb, max_blocks=maxb, | |
| k=k, gap_threshold=gap, pad=pad, ar_mode=ar_mode, | |
| face_tracking=face_tracking) | |
| return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)." | |
| css = """ | |
| :root { | |
| --neon: #39FF14; | |
| --txt: #0a0a0a; | |
| --muted: #374151; | |
| --line: #e5e7eb; | |
| --bg: #ffffff; | |
| } | |
| html, body, .gradio-container { background: var(--bg) !important; color: var(--txt) !important; } | |
| .gradio-container { font-family: 'Manrope', system-ui, sans-serif !important; } | |
| .gradio-container h1 { font-weight: 800 !important; font-size: clamp(28px, 5vw, 46px) !important; } | |
| .gradio-container button.primary { | |
| background: var(--neon) !important; color: #000 !important; border: none !important; | |
| border-radius: 10px !important; font-weight: 800 !important; padding: 12px 20px !important; | |
| } | |
| """ | |
| with gr.Blocks(title="Editor de Cortes Automático", css=css) as demo: | |
| gr.HTML(""" | |
| <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;800&display=swap" rel="stylesheet"> | |
| <div style="text-align: center; padding: 24px 0;"> | |
| <h1>🎬 Editor de Cortes Automático</h1> | |
| <p style="color: #6b7280;">Gere cortes com rastreamento facial inteligente</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| video = gr.Video(label="Vídeo de entrada", interactive=True) | |
| with gr.Row(): | |
| model_size = gr.Dropdown(["tiny","base","small","medium"], value="small", label="Modelo Whisper") | |
| out_subdir = gr.Textbox(label="Pasta de saída", value="cortes") | |
| transcribe_btn = gr.Button("🎙️ 1) Transcrever", variant="primary") | |
| transcript_preview = gr.Textbox(label="Status", lines=10) | |
| with gr.Column(): | |
| with gr.Tab("✂️ Cortes Simples"): | |
| with gr.Row(): | |
| min_len = gr.Number(value=600, label="Min (s)") | |
| max_len = gr.Number(value=900, label="Max (s)") | |
| with gr.Row(): | |
| ideal_len = gr.Number(value=900, label="Ideal (s)") | |
| k = gr.Number(value=2, label="Quantidade") | |
| with gr.Row(): | |
| gap = gr.Number(value=0.60, label="Gap") | |
| pad = gr.Number(value=0.08, label="Pad") | |
| ar_mode = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"], | |
| value="Original", label="Formato") | |
| face_tracking = gr.Checkbox(label="👤 Rastreamento facial", value=True) | |
| go_linear = gr.Button("🚀 2) Gerar Cortes", variant="primary") | |
| out_linear = gr.Files(label="Arquivos gerados") | |
| status_linear = gr.Textbox(label="Status", lines=2) | |
| with gr.Tab("🎨 Cortes Criativos"): | |
| with gr.Row(): | |
| minb = gr.Number(value=3, label="Blocos min") | |
| maxb = gr.Number(value=8, label="Blocos max") | |
| with gr.Row(): | |
| k2 = gr.Number(value=2, label="Quantidade") | |
| gap2 = gr.Number(value=0.60, label="Gap") | |
| pad2 = gr.Number(value=0.08, label="Pad") | |
| ar_mode2 = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"], | |
| value="Original", label="Formato") | |
| face_tracking2 = gr.Checkbox(label="👤 Rastreamento facial", value=True) | |
| go_creative = gr.Button("🎬 3) Gerar Criativos", variant="primary") | |
| out_creative = gr.Files(label="Arquivos gerados") | |
| status_creative = gr.Textbox(label="Status", lines=2) | |
| segs_state = gr.State([]) | |
| transcribe_btn.click( | |
| do_transcribe, | |
| inputs=[video, model_size], | |
| outputs=[segs_state, transcript_preview] | |
| ) | |
| go_linear.click( | |
| run_linear, | |
| inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, k, gap, pad, ar_mode, face_tracking], | |
| outputs=[out_linear, status_linear] | |
| ) | |
| go_creative.click( | |
| run_creative, | |
| inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, minb, maxb, k2, gap2, pad2, ar_mode2, face_tracking2], | |
| outputs=[out_creative, status_creative] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |