Spaces:

leicam
/

EditorCortes

Sleeping

App Files Files Community

EditorCortes / app.py

leicam

Update app.py

880dbad verified 3 months ago

raw

history blame contribute delete

31.9 kB

	"""
	V34
	"""

	import gradio as gr
	import cv2
	import numpy as np
	import whisper
	import subprocess
	from pathlib import Path
	from dataclasses import dataclass
	from typing import List, Tuple, Optional
	import tempfile
	import os
	import shutil

	# ======================= DATACLASSES =======================

	@dataclass
	class Segment:
	"""Representa um segmento de transcrição com timestamps."""
	start: float
	end: float
	text: str

	def __repr__(self):
	return f"Segment({self.start:.1f}-{self.end:.1f}: {self.text[:50]}...)"

	@dataclass
	class FaceBox:
	"""Representa uma detecção de rosto."""
	x: int
	y: int
	w: int
	h: int
	center_x: int
	center_y: int
	confidence: float = 1.0

	# ======================= FACE TRACKING =======================

	class FaceTracker:
	"""Rastreador de rostos para crop inteligente."""

	def __init__(self):
	cascade_paths = [
	cv2.data.haarcascades + 'haarcascade_frontalface_default.xml',
	cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml',
	]

	self.face_cascade = None
	for path in cascade_paths:
	try:
	self.face_cascade = cv2.CascadeClassifier(path)
	if not self.face_cascade.empty():
	break
	except:
	continue

	self.enabled = self.face_cascade is not None and not self.face_cascade.empty()
	if self.enabled:
	print("✅ Detector de rostos carregado")
	else:
	print("⚠️ Detector de rostos não disponível - usando crop centralizado")

	def detect_faces(self, frame: np.ndarray) -> List[FaceBox]:
	if not self.enabled:
	return []

	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	faces = self.face_cascade.detectMultiScale(
	gray, scaleFactor=1.1, minNeighbors=5,
	minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE
	)

	face_boxes = []
	for (x, y, w, h) in faces:
	center_x = x + w // 2
	center_y = y + h // 2
	face_boxes.append(FaceBox(x, y, w, h, center_x, center_y))

	return face_boxes

	def get_primary_face(self, faces: List[FaceBox], frame_width: int, frame_height: int) -> Optional[FaceBox]:
	if not faces:
	return None
	if len(faces) == 1:
	return faces[0]

	frame_center_x = frame_width / 2
	frame_center_y = frame_height / 2

	scored_faces = []
	for face in faces:
	size_score = (face.w * face.h) / (frame_width * frame_height)
	dx = abs(face.center_x - frame_center_x) / frame_width
	dy = abs(face.center_y - frame_center_y) / frame_height
	center_score = 1 - (dx + dy) / 2
	total_score = (size_score * 0.3) + (center_score * 0.7)
	scored_faces.append((total_score, face))

	scored_faces.sort(reverse=True, key=lambda x: x[0])
	return scored_faces[0][1]

	def calculate_smart_crop(self, frame: np.ndarray, target_width: int, target_height: int) -> Tuple[int, int, int, int]:
	frame_h, frame_w = frame.shape[:2]
	faces = self.detect_faces(frame)
	primary_face = self.get_primary_face(faces, frame_w, frame_h)

	target_ar = target_width / target_height
	frame_ar = frame_w / frame_h

	if primary_face:
	face_center_x = primary_face.center_x
	face_center_y = primary_face.center_y

	if target_ar < frame_ar: # Vertical
	crop_w = int(frame_h * target_ar)
	crop_h = frame_h
	crop_x = max(0, min(face_center_x - crop_w // 2, frame_w - crop_w))
	crop_y = 0
	else: # Horizontal/Quadrado
	crop_w = frame_w
	crop_h = int(frame_w / target_ar)
	offset = int(crop_h * 0.1)
	crop_x = 0
	crop_y = max(0, min(face_center_y - crop_h // 2 - offset, frame_h - crop_h))
	else:
	# Fallback centralizado
	if target_ar < frame_ar:
	crop_w = int(frame_h * target_ar)
	crop_h = frame_h
	crop_x = (frame_w - crop_w) // 2
	crop_y = 0
	else:
	crop_w = frame_w
	crop_h = int(frame_w / target_ar)
	crop_x = 0
	crop_y = (frame_h - crop_h) // 2

	return (crop_x, crop_y, crop_w, crop_h)

	# ======================= TRANSCRIÇÃO =======================

	def transcribe(video_file: str, model_size: str = "small") -> List[Segment]:
	print(f"🎙️ Carregando modelo Whisper: {model_size}")
	model = whisper.load_model(model_size)

	print(f"🎬 Transcrevendo: {video_file}")
	result = model.transcribe(video_file, language="pt", verbose=False)

	segments = []
	for seg in result["segments"]:
	segments.append(Segment(
	start=seg["start"],
	end=seg["end"],
	text=seg["text"].strip()
	))

	print(f"✅ Transcrição completa: {len(segments)} segmentos")
	return segments

	# ======================= PROCESSAMENTO DE VÍDEO =======================

	def extract_video_segment(input_video: str, output_video: str, start_time: float, end_time: float) -> bool:
	duration = end_time - start_time
	cmd = [
	"ffmpeg", "-y", "-ss", str(start_time), "-i", input_video,
	"-t", str(duration), "-c:v", "libx264", "-c:a", "aac",
	"-strict", "experimental", output_video
	]

	try:
	subprocess.run(cmd, check=True, capture_output=True)
	return True
	except subprocess.CalledProcessError as e:
	print(f"❌ Erro ao extrair: {e}")
	return False

	def apply_smart_crop_to_video(input_path: str, output_path: str, target_width: int,
	target_height: int, sample_frames: int = 10) -> bool:
	tracker = FaceTracker()
	cap = cv2.VideoCapture(input_path)

	if not cap.isOpened():
	print(f"❌ Erro ao abrir: {input_path}")
	return False

	fps = int(cap.get(cv2.CAP_PROP_FPS))
	frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

	# Amostragem para suavização
	sample_positions = []
	frame_indices = np.linspace(0, frame_count - 1, min(sample_frames, frame_count), dtype=int)

	for idx in frame_indices:
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame = cap.read()
	if ret:
	crop_coords = tracker.calculate_smart_crop(frame, target_width, target_height)
	sample_positions.append(crop_coords)

	# Posição média (suavizada)
	if sample_positions:
	avg_x = int(np.median([p[0] for p in sample_positions]))
	avg_y = int(np.median([p[1] for p in sample_positions]))
	crop_w = sample_positions[0][2]
	crop_h = sample_positions[0][3]
	final_crop = (avg_x, avg_y, crop_w, crop_h)
	else:
	# Fallback
	target_ar = target_width / target_height
	frame_ar = frame_w / frame_h
	if target_ar < frame_ar:
	crop_w = int(frame_h * target_ar)
	crop_h = frame_h
	final_crop = ((frame_w - crop_w) // 2, 0, crop_w, crop_h)
	else:
	crop_w = frame_w
	crop_h = int(frame_w / target_ar)
	final_crop = (0, (frame_h - crop_h) // 2, crop_w, crop_h)

	cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

	# Writer
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	out = cv2.VideoWriter(output_path, fourcc, fps, (target_width, target_height))

	if not out.isOpened():
	print(f"❌ Erro ao criar saída: {output_path}")
	cap.release()
	return False

	print(f"🎬 Processando com crop: {final_crop}")
	frame_num = 0

	while True:
	ret, frame = cap.read()
	if not ret:
	break

	x, y, w, h = final_crop
	cropped = frame[y:y+h, x:x+w]
	resized = cv2.resize(cropped, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4)
	out.write(resized)
	frame_num += 1

	if frame_num % 30 == 0:
	progress = (frame_num / frame_count) * 100
	print(f" {progress:.1f}% ({frame_num}/{frame_count})")

	cap.release()
	out.release()
	print(f"✅ Concluído: {output_path}")
	return True

	def apply_aspect_ratio(input_video: str, output_video: str, ar_mode: str, face_tracking: bool = False) -> bool:
	if ar_mode == "Original":
	shutil.copy(input_video, output_video)
	return True

	ar_dims = {
	"Vertical 9:16": (1080, 1920),
	"Quadrado 1:1": (1080, 1080),
	"Retrato 4:5": (1080, 1350),
	}

	if ar_mode not in ar_dims:
	return False

	width, height = ar_dims[ar_mode]

	if face_tracking:
	return apply_smart_crop_to_video(input_video, output_video, width, height)
	else:
	# Crop centralizado tradicional
	cmd = [
	"ffmpeg", "-y", "-i", input_video,
	"-vf", f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}",
	"-c:a", "copy", output_video
	]
	try:
	subprocess.run(cmd, check=True, capture_output=True)
	return True
	except subprocess.CalledProcessError:
	return False

	def concatenate_videos(video_files: List[str], output_file: str) -> bool:
	if not video_files:
	return False

	with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
	list_file = f.name
	for vf in video_files:
	f.write(f"file '{os.path.abspath(vf)}'\n")

	try:
	cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", output_file]
	subprocess.run(cmd, check=True, capture_output=True)
	return True
	except subprocess.CalledProcessError:
	return False
	finally:
	Path(list_file).unlink(missing_ok=True)

	# ======================= LEGENDAS CRIATIVAS =======================

	def highlight_keywords(text: str) -> List[Tuple[str, bool]]:
	"""
	Identifica palavras-chave para destaque.
	Retorna lista de (palavra, is_highlighted)
	"""
	keywords = [
	# Ação/Imperativo
	"tem que", "precisa", "deve", "faça", "veja", "olha", "escuta",
	# Negação/Contraste
	"não", "nunca", "jamais", "mas", "porém", "entretanto",
	# Impacto
	"problema", "solução", "segredo", "verdade", "realidade",
	# Números
	"milhão", "mil", "bilhão", "100%", "zero",
	# Emoção
	"incrível", "impossível", "fácil", "difícil", "importante",
	# Ação mental
	"imagina", "pensa", "considera", "decide", "escolhe"
	]

	words = text.split()
	result = []

	for word in words:
	word_lower = word.lower().strip(".,!?")
	is_key = any(k in word_lower for k in keywords)
	result.append((word, is_key))

	return result

	def create_subtitle_clip(text: str, start: float, end: float,
	video_width: int, video_height: int,
	style: str = "hormozi") -> str:
	"""
	Cria arquivo ASS (Advanced SubStation Alpha) com legendas estilizadas.
	Retorna caminho do arquivo .ass
	"""

	if style == "hormozi":
	# Estilo Alex Hormozi
	style_def = """[V4+ Styles]
	Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
	Style: Default,Montserrat,72,&H00FFFF,&H00FFFF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,10,10,80,1
	Style: Highlight,Montserrat,78,&H0000FFFF,&H0000FFFF,&H00000000,&H80000000,-1,0,0,0,110,110,0,0,1,4,3,2,10,10,80,1"""

	# Processa texto com highlights
	words_with_highlight = highlight_keywords(text)

	# Divide em linhas (max 40 caracteres por linha)
	lines = []
	current_line = []
	current_length = 0

	for word, is_highlight in words_with_highlight:
	word_len = len(word) + 1
	if current_length + word_len > 40 and current_line:
	lines.append(current_line)
	current_line = [(word, is_highlight)]
	current_length = word_len
	else:
	current_line.append((word, is_highlight))
	current_length += word_len

	if current_line:
	lines.append(current_line)

	# Limita a 2 linhas
	if len(lines) > 2:
	lines = lines[:2]

	# Gera texto formatado
	formatted_lines = []
	for line in lines:
	line_text = []
	for word, is_highlight in line:
	if is_highlight:
	# Destaque: maior, amarelo vibrante, caps
	line_text.append(f"{{\\1c&H00FFFF&\\fs78\\b1}}{word.upper()}{{\\r}}")
	else:
	line_text.append(word)
	formatted_lines.append(" ".join(line_text))

	final_text = "\\N".join(formatted_lines) # \N = quebra de linha no ASS

	# Cria arquivo ASS
	fd, ass_path = tempfile.mkstemp(suffix=".ass")

	with os.fdopen(fd, 'w', encoding='utf-8') as f:
	f.write("""[Script Info]
	Title: Viral Subtitles
	ScriptType: v4.00+
	WrapStyle: 0
	PlayResX: """ + str(video_width) + """
	PlayResY: """ + str(video_height) + """
	ScaledBorderAndShadow: yes

	""")
	f.write(style_def + "\n\n")
	f.write("""[Events]
	Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
	""")

	# Converte tempo para formato ASS (0:00:00.00)
	def format_time(seconds):
	h = int(seconds // 3600)
	m = int((seconds % 3600) // 60)
	s = seconds % 60
	return f"{h}:{m:02d}:{s:05.2f}"

	start_time = format_time(start)
	end_time = format_time(end)

	f.write(f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{final_text}\n")

	return ass_path

	def add_subtitles_to_video(input_video: str, output_video: str,
	segments: List[Segment], style: str = "hormozi") -> bool:
	"""
	Adiciona legendas estilizadas ao vídeo usando FFmpeg + ASS.
	"""

	# Pega dimensões do vídeo
	cap = cv2.VideoCapture(input_video)
	video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	cap.release()

	# Cria arquivo ASS completo com todos os segmentos
	fd, ass_path = tempfile.mkstemp(suffix=".ass")

	with os.fdopen(fd, 'w', encoding='utf-8') as f:
	# Header
	f.write(f"""[Script Info]
	Title: Viral Subtitles
	ScriptType: v4.00+
	WrapStyle: 0
	PlayResX: {video_width}
	PlayResY: {video_height}
	ScaledBorderAndShadow: yes

	[V4+ Styles]
	Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
	Style: Default,Montserrat,68,&H00FFFF00,&H00FFFF00,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,10,10,60,1

	[Events]
	Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
	""")

	# Adiciona cada segmento
	for seg in segments:
	words_with_highlight = highlight_keywords(seg.text)

	# Formata texto
	formatted_words = []
	for word, is_highlight in words_with_highlight:
	if is_highlight:
	formatted_words.append(f"{{\\1c&H0000FFFF&\\fs76\\b1}}{word.upper()}{{\\r}}")
	else:
	formatted_words.append(word)

	text = " ".join(formatted_words)

	# Quebra em linhas (max 40 chars)
	words = text.split()
	lines = []
	current = []
	length = 0

	for w in words:
	w_len = len(w.replace("{\\1c&H0000FFFF&\\fs76\\b1}", "").replace("{\\r}", "")) + 1
	if length + w_len > 40 and current:
	lines.append(" ".join(current))
	current = [w]
	length = w_len
	else:
	current.append(w)
	length += w_len

	if current:
	lines.append(" ".join(current))

	final_text = "\\N".join(lines[:2]) # Max 2 linhas

	# Formato de tempo ASS
	def fmt_time(s):
	h = int(s // 3600)
	m = int((s % 3600) // 60)
	sec = s % 60
	return f"{h}:{m:02d}:{sec:05.2f}"

	start_str = fmt_time(seg.start)
	end_str = fmt_time(seg.end)

	f.write(f"Dialogue: 0,{start_str},{end_str},Default,,0,0,0,,{final_text}\n")

	# Aplica legendas com FFmpeg
	print(f"[legendas] Aplicando estilo {style}...")

	# Escape do caminho para FFmpeg (Windows/Linux)
	ass_path_escaped = ass_path.replace('\\', '/').replace(':', '\\:')

	cmd = [
	"ffmpeg", "-y",
	"-i", input_video,
	"-vf", f"ass={ass_path_escaped}",
	"-c:v", "libx264",
	"-preset", "medium",
	"-crf", "18",
	"-c:a", "copy",
	"-movflags", "+faststart",
	output_video
	]

	try:
	subprocess.run(cmd, check=True, capture_output=True)
	print(f"[legendas] ✅ Concluído: {output_video}")
	return True
	except subprocess.CalledProcessError as e:
	print(f"[legendas] ❌ Erro: {e}")
	return False
	finally:
	try:
	Path(ass_path).unlink(missing_ok=True)
	except:
	pass

	def score_segment_virality(seg: Segment, idx: int, total: int) -> float:
	"""
	Pontua um segmento baseado em potencial viral.
	Inspirado nos padrões do OpsClip.
	"""
	score = 0.0
	text = seg.text.lower()

	# GANCHOS (perguntas, provocações)
	if any(w in text for w in ["?", "por que", "qual", "como", "você"]):
	score += 15

	# FRASES DE IMPACTO
	impact_phrases = [
	"não dá", "problema", "esse é o", "imaginou", "é só",
	"mas", "porém", "entretanto", "então", "olha",
	"escuta", "presta atenção", "isso", "agora"
	]
	for phrase in impact_phrases:
	if phrase in text:
	score += 8

	# NEGAÇÕES E CONTRASTES (criam tensão)
	if any(w in text for w in ["não", "nunca", "jamais", "sem"]):
	score += 5

	# AÇÃO/IMPERATIVO (engajamento)
	if any(w in text for w in ["tem que", "precisa", "deve", "faça", "veja"]):
	score += 7

	# NÚMEROS E DADOS (autoridade)
	if any(c.isdigit() for c in text):
	score += 6

	# DURAÇÃO IDEAL (15-45s = viral)
	duration = seg.end - seg.start
	if 15 <= duration <= 45:
	score += 20
	elif 10 <= duration <= 60:
	score += 10

	# POSIÇÃO NO VÍDEO (meio tem mais contexto)
	position_ratio = idx / max(1, total)
	if 0.2 <= position_ratio <= 0.8: # Evita extremos
	score += 10

	# COMPLETUDE (evita frases cortadas)
	if text.strip().endswith((".", "!", "?", "né", "tá")):
	score += 8

	return score

	def find_viral_moments(segments: List[Segment], k: int = 5) -> List[Tuple[int, int, float]]:
	"""
	Encontra os k melhores momentos virais.
	Retorna lista de (start_idx, end_idx, score)
	"""
	viral_windows = []

	# Janelas deslizantes de diferentes tamanhos
	window_sizes = [1, 2, 3, 4, 5] # Quantos segmentos consecutivos

	for window_size in window_sizes:
	for i in range(len(segments) - window_size + 1):
	window_segments = segments[i:i+window_size]

	# Calcula duração total da janela
	total_duration = window_segments[-1].end - window_segments[0].start

	# Pula janelas muito longas ou curtas
	if total_duration < 10 or total_duration > 60:
	continue

	# Pontuação agregada da janela
	window_score = sum(score_segment_virality(seg, i+j, len(segments))
	for j, seg in enumerate(window_segments))

	# Bonus para janelas com narrativa completa
	combined_text = " ".join(s.text for s in window_segments)
	if "?" in combined_text and any(w in combined_text.lower() for w in ["porque", "então", "mas", "porém"]):
	window_score += 15 # Pergunta + resposta = narrativa completa

	viral_windows.append((i, i+window_size-1, window_score, total_duration))

	# Ordena por score e remove sobreposições
	viral_windows.sort(key=lambda x: x[2], reverse=True)

	selected = []
	used_indices = set()

	for start_idx, end_idx, score, duration in viral_windows:
	# Verifica se não sobrepõe com já selecionados
	if not any(idx in used_indices for idx in range(start_idx, end_idx + 1)):
	selected.append((start_idx, end_idx, score))
	used_indices.update(range(start_idx, end_idx + 1))

	if len(selected) >= k:
	break

	return selected

	# ======================= GERAÇÃO DE CORTES =======================

	def generate_linear_cuts(video_file: str, segments: List[Segment], output_dir: str,
	min_len: float = 600, max_len: float = 900, ideal_len: float = 900,
	k: int = 2, gap_threshold: float = 0.60, pad: float = 0.08,
	ar_mode: str = "Original", face_tracking: bool = False,
	add_subtitles: bool = False) -> List[str]:
	if not segments:
	return []

	Path(output_dir).mkdir(parents=True, exist_ok=True)
	total_duration = segments[-1].end - segments[0].start
	target_duration = min(max_len, max(min_len, total_duration / k))

	outputs = []
	current_start = segments[0].start

	for i in range(k):
	target_end = current_start + target_duration
	best_end = target_end

	for seg in segments:
	if abs(seg.end - target_end) < gap_threshold and seg.end > current_start:
	best_end = seg.end
	break

	if best_end - current_start > max_len:
	best_end = current_start + max_len

	start_with_pad = max(0, current_start - pad)
	end_with_pad = best_end + pad

	temp_file = Path(output_dir) / f"temp_linear_{i+1}.mp4"
	final_file = Path(output_dir) / f"cut_linear_{i+1}.mp4"

	print(f"✂️ Corte {i+1}/{k}: {start_with_pad:.1f}s - {end_with_pad:.1f}s")

	if extract_video_segment(video_file, str(temp_file), start_with_pad, end_with_pad):
	if ar_mode != "Original":
	if apply_aspect_ratio(str(temp_file), str(final_file), ar_mode, face_tracking):
	temp_file.unlink()
	outputs.append(str(final_file))
	else:
	temp_file.rename(final_file)
	outputs.append(str(final_file))

	current_start = best_end + gap_threshold
	if current_start >= segments[-1].end:
	break

	return outputs

	def generate_creative_cuts(video_file: str, segments: List[Segment], output_dir: str,
	min_len: float = 600, max_len: float = 900, ideal_len: float = 900,
	min_blocks: int = 3, max_blocks: int = 8, k: int = 2,
	gap_threshold: float = 0.60, pad: float = 0.08,
	ar_mode: str = "Original", face_tracking: bool = False,
	add_subtitles: bool = False) -> List[str]:
	if not segments or len(segments) < min_blocks:
	return []

	Path(output_dir).mkdir(parents=True, exist_ok=True)
	outputs = []

	import random
	for i in range(k):
	num_blocks = random.randint(min_blocks, min(max_blocks, len(segments)))
	step = max(1, len(segments) // num_blocks)
	selected_indices = [j * step for j in range(num_blocks)]
	selected_segments = [segments[idx] for idx in selected_indices if idx < len(segments)]

	block_files = []
	for j, seg in enumerate(selected_segments):
	block_file = Path(output_dir) / f"temp_creative_{i+1}_block_{j+1}.mp4"
	start = max(0, seg.start - pad)
	end = seg.end + pad

	if extract_video_segment(video_file, str(block_file), start, end):
	block_files.append(str(block_file))

	if not block_files:
	continue

	concat_file = Path(output_dir) / f"temp_creative_{i+1}_concat.mp4"
	if concatenate_videos(block_files, str(concat_file)):
	final_file = Path(output_dir) / f"cut_creative_{i+1}.mp4"

	if ar_mode != "Original":
	if apply_aspect_ratio(str(concat_file), str(final_file), ar_mode, face_tracking):
	concat_file.unlink()
	outputs.append(str(final_file))
	else:
	concat_file.rename(final_file)
	outputs.append(str(final_file))

	for bf in block_files:
	Path(bf).unlink(missing_ok=True)

	return outputs

	# ======================= INTERFACE GRADIO =======================

	SPACE_OUT = Path("outputs")
	SPACE_OUT.mkdir(exist_ok=True, parents=True)

	def do_transcribe(video_file, model_size):
	if video_file is None:
	return [], "Selecione um vídeo."
	segs = transcribe(video_file, model_size=model_size)
	preview = "\n".join([f"[{s.start:.1f}–{s.end:.1f}] {s.text}" for s in segs[:12]])
	return segs, f"Transcrição ok. Segmentos: {len(segs)}\n\nPrévia:\n{preview}"

	def run_linear(segs, video_file, out_subdir, min_len, max_len, ideal_len, k, gap, pad, ar_mode, face_tracking):
	if not segs:
	return [], "Transcreva antes de cortar."
	workdir = SPACE_OUT / (out_subdir or "cortes")
	outs = generate_linear_cuts(video_file, segs, str(workdir), min_len=min_len, max_len=max_len,
	ideal_len=ideal_len, k=k, gap_threshold=gap, pad=pad,
	ar_mode=ar_mode, face_tracking=face_tracking)
	return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)."

	def run_creative(segs, video_file, out_subdir, min_len, max_len, ideal_len, minb, maxb, k, gap, pad, ar_mode, face_tracking):
	if not segs:
	return [], "Transcreva antes de cortar."
	workdir = SPACE_OUT / (out_subdir or "cortes")
	outs = generate_creative_cuts(video_file, segs, str(workdir), min_len=min_len, max_len=max_len,
	ideal_len=ideal_len, min_blocks=minb, max_blocks=maxb,
	k=k, gap_threshold=gap, pad=pad, ar_mode=ar_mode,
	face_tracking=face_tracking)
	return [str(Path(p)) for p in outs], f"Gerados: {len(outs)} arquivo(s)."

	css = """
	:root {
	--neon: #39FF14;
	--txt: #0a0a0a;
	--muted: #374151;
	--line: #e5e7eb;
	--bg: #ffffff;
	}
	html, body, .gradio-container { background: var(--bg) !important; color: var(--txt) !important; }
	.gradio-container { font-family: 'Manrope', system-ui, sans-serif !important; }
	.gradio-container h1 { font-weight: 800 !important; font-size: clamp(28px, 5vw, 46px) !important; }
	.gradio-container button.primary {
	background: var(--neon) !important; color: #000 !important; border: none !important;
	border-radius: 10px !important; font-weight: 800 !important; padding: 12px 20px !important;
	}
	"""

	with gr.Blocks(title="Editor de Cortes Automático", css=css) as demo:
	gr.HTML("""
	<link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;800&display=swap" rel="stylesheet">
	<div style="text-align: center; padding: 24px 0;">
	<h1>🎬 Editor de Cortes Automático</h1>
	<p style="color: #6b7280;">Gere cortes com rastreamento facial inteligente</p>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	video = gr.Video(label="Vídeo de entrada", interactive=True)
	with gr.Row():
	model_size = gr.Dropdown(["tiny","base","small","medium"], value="small", label="Modelo Whisper")
	out_subdir = gr.Textbox(label="Pasta de saída", value="cortes")
	transcribe_btn = gr.Button("🎙️ 1) Transcrever", variant="primary")
	transcript_preview = gr.Textbox(label="Status", lines=10)

	with gr.Column():
	with gr.Tab("✂️ Cortes Simples"):
	with gr.Row():
	min_len = gr.Number(value=600, label="Min (s)")
	max_len = gr.Number(value=900, label="Max (s)")
	with gr.Row():
	ideal_len = gr.Number(value=900, label="Ideal (s)")
	k = gr.Number(value=2, label="Quantidade")
	with gr.Row():
	gap = gr.Number(value=0.60, label="Gap")
	pad = gr.Number(value=0.08, label="Pad")
	ar_mode = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
	value="Original", label="Formato")
	face_tracking = gr.Checkbox(label="👤 Rastreamento facial", value=True)
	go_linear = gr.Button("🚀 2) Gerar Cortes", variant="primary")
	out_linear = gr.Files(label="Arquivos gerados")
	status_linear = gr.Textbox(label="Status", lines=2)

	with gr.Tab("🎨 Cortes Criativos"):
	with gr.Row():
	minb = gr.Number(value=3, label="Blocos min")
	maxb = gr.Number(value=8, label="Blocos max")
	with gr.Row():
	k2 = gr.Number(value=2, label="Quantidade")
	gap2 = gr.Number(value=0.60, label="Gap")
	pad2 = gr.Number(value=0.08, label="Pad")
	ar_mode2 = gr.Dropdown(["Original","Vertical 9:16","Quadrado 1:1","Retrato 4:5"],
	value="Original", label="Formato")
	face_tracking2 = gr.Checkbox(label="👤 Rastreamento facial", value=True)
	go_creative = gr.Button("🎬 3) Gerar Criativos", variant="primary")
	out_creative = gr.Files(label="Arquivos gerados")
	status_creative = gr.Textbox(label="Status", lines=2)

	segs_state = gr.State([])

	transcribe_btn.click(
	do_transcribe,
	inputs=[video, model_size],
	outputs=[segs_state, transcript_preview]
	)

	go_linear.click(
	run_linear,
	inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, k, gap, pad, ar_mode, face_tracking],
	outputs=[out_linear, status_linear]
	)

	go_creative.click(
	run_creative,
	inputs=[segs_state, video, out_subdir, min_len, max_len, ideal_len, minb, maxb, k2, gap2, pad2, ar_mode2, face_tracking2],
	outputs=[out_creative, status_creative]
	)

	if __name__ == "__main__":
	demo.launch()