Spaces:
Sleeping
Sleeping
Upload 24 files
Browse files- scripts/create_viral_segments.py +343 -301
- scripts/download_video.py +46 -10
- scripts/edit_video.py +105 -60
- scripts/one_face.py +30 -0
scripts/create_viral_segments.py
CHANGED
|
@@ -3,6 +3,7 @@ import os
|
|
| 3 |
import re
|
| 4 |
import sys
|
| 5 |
import time
|
|
|
|
| 6 |
|
| 7 |
# Tenta importar bibliotecas de IA opcionalmente
|
| 8 |
try:
|
|
@@ -11,6 +12,9 @@ try:
|
|
| 11 |
except ImportError:
|
| 12 |
HAS_GEMINI = False
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
except ImportError:
|
| 15 |
HAS_G4F = False
|
| 16 |
|
|
@@ -21,37 +25,108 @@ except ImportError:
|
|
| 21 |
HAS_LLAMA_CPP = False
|
| 22 |
|
| 23 |
def clean_json_response(response_text):
|
| 24 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
if not response_text:
|
| 26 |
return {"segments": []}
|
| 27 |
-
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
if match:
|
| 32 |
-
response_text = match.group(1)
|
| 33 |
-
else:
|
| 34 |
-
pattern_generic = r"```(.*?)```"
|
| 35 |
-
match_generic = re.search(pattern_generic, response_text, re.DOTALL)
|
| 36 |
-
if match_generic:
|
| 37 |
-
response_text = match_generic.group(1)
|
| 38 |
-
|
| 39 |
-
# Always attempt to extract from outermost curly braces,
|
| 40 |
-
# as some models chatter before/after the code block
|
| 41 |
-
start_idx = response_text.find("{")
|
| 42 |
-
end_idx = response_text.rfind("}")
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
def preprocess_transcript_for_ai(segments):
|
| 51 |
"""
|
| 52 |
Concatenates transcript segments into a single string with embedded time tags.
|
| 53 |
-
Tags are inserted at the beginning (0s) and roughly every 4 seconds thereafter.
|
| 54 |
-
Format: "Word word word (4s) word word..."
|
| 55 |
"""
|
| 56 |
if not segments:
|
| 57 |
return ""
|
|
@@ -68,10 +143,8 @@ def preprocess_transcript_for_ai(segments):
|
|
| 68 |
text = seg.get('text', '').strip()
|
| 69 |
end_time = seg.get('end', 0)
|
| 70 |
|
| 71 |
-
# Add text
|
| 72 |
full_text += text + " "
|
| 73 |
|
| 74 |
-
# Add tag if ~4 seconds passed since last tag
|
| 75 |
if end_time - last_tag_time >= 4:
|
| 76 |
full_text += f"({int(end_time)}s) "
|
| 77 |
last_tag_time = end_time
|
|
@@ -96,12 +169,11 @@ def call_gemini(prompt, api_key, model_name='gemini-2.5-flash-lite-preview-09-20
|
|
| 96 |
except Exception as e:
|
| 97 |
error_str = str(e)
|
| 98 |
if "429" in error_str or "Quota exceeded" in error_str:
|
| 99 |
-
wait_time = base_wait * (attempt + 1)
|
| 100 |
|
| 101 |
-
# Try to find specific wait time in error message
|
| 102 |
match = re.search(r"retry in (\d+(\.\d+)?)s", error_str)
|
| 103 |
if match:
|
| 104 |
-
wait_time = float(match.group(1)) + 5.0
|
| 105 |
|
| 106 |
print(f"[429] Quota Exceeded. Waiting {wait_time:.2f}s before retry {attempt+1}/{max_retries}...", flush=True)
|
| 107 |
time.sleep(wait_time)
|
|
@@ -117,25 +189,53 @@ def call_g4f(prompt, model_name="gpt-4o-mini"):
|
|
| 117 |
if not HAS_G4F:
|
| 118 |
raise ImportError("A biblioteca 'g4f' não está instalada. Instale com: pip install g4f")
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
-
|
|
|
|
| 135 |
input_tsv = os.path.join(project_folder, 'input.tsv')
|
| 136 |
input_srt = os.path.join(project_folder, 'input.srt')
|
| 137 |
|
| 138 |
-
# Parse Input into Segments first
|
| 139 |
transcript_segments = []
|
| 140 |
|
| 141 |
# Try to load TSV first (more reliable time)
|
|
@@ -162,8 +262,6 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
|
|
| 162 |
if not transcript_segments and os.path.exists(input_srt):
|
| 163 |
with open(input_srt, 'r', encoding='utf-8') as f:
|
| 164 |
srt_content = f.read()
|
| 165 |
-
# Simple SRT Regex Parser
|
| 166 |
-
# Matches: index, time range, text
|
| 167 |
pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n((?:(?!\n\n).)*)', re.DOTALL)
|
| 168 |
matches = pattern.findall(srt_content)
|
| 169 |
|
|
@@ -179,11 +277,187 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
|
|
| 179 |
|
| 180 |
if not transcript_segments:
|
| 181 |
raise ValueError("Could not parse transcript from TSV or SRT.")
|
|
|
|
|
|
|
| 182 |
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
content = formatted_content
|
| 188 |
|
| 189 |
# Load Config and Prompt
|
|
@@ -191,7 +465,6 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
|
|
| 191 |
config_path = os.path.join(base_dir, 'api_config.json')
|
| 192 |
prompt_path = os.path.join(base_dir, 'prompt.txt')
|
| 193 |
|
| 194 |
-
# Default Config
|
| 195 |
config = {
|
| 196 |
"selected_api": "gemini",
|
| 197 |
"gemini": {
|
|
@@ -209,38 +482,31 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
|
|
| 209 |
try:
|
| 210 |
with open(config_path, 'r', encoding='utf-8') as f:
|
| 211 |
loaded_config = json.load(f)
|
| 212 |
-
# Merge simples
|
| 213 |
if "gemini" in loaded_config: config["gemini"].update(loaded_config["gemini"])
|
| 214 |
if "g4f" in loaded_config: config["g4f"].update(loaded_config["g4f"])
|
| 215 |
if "selected_api" in loaded_config: config["selected_api"] = loaded_config["selected_api"]
|
| 216 |
except Exception as e:
|
| 217 |
-
print(f"Erro ao ler api_config.json: {e}
|
| 218 |
|
| 219 |
-
#
|
| 220 |
-
current_chunk_size = 15000
|
| 221 |
model_name = ""
|
| 222 |
|
| 223 |
if ai_mode == "gemini":
|
| 224 |
cfg_chunk = config["gemini"].get("chunk_size", 15000)
|
| 225 |
current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else cfg_chunk
|
| 226 |
-
|
| 227 |
cfg_model = config["gemini"].get("model", "gemini-2.5-flash-lite-preview-09-2025")
|
| 228 |
model_name = model_name_arg if model_name_arg else cfg_model
|
| 229 |
-
|
| 230 |
-
if not api_key: # Se não veio por argumento, tenta do config
|
| 231 |
-
api_key = config["gemini"].get("api_key", "")
|
| 232 |
|
| 233 |
elif ai_mode == "g4f":
|
| 234 |
cfg_chunk = config["g4f"].get("chunk_size", 2000)
|
| 235 |
current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else cfg_chunk
|
| 236 |
-
|
| 237 |
cfg_model = config["g4f"].get("model", "gpt-4o-mini")
|
| 238 |
model_name = model_name_arg if model_name_arg else cfg_model
|
| 239 |
|
| 240 |
elif ai_mode == "local":
|
| 241 |
-
# For local, chunk size default 3000 chars roughly matches 1024-2048 tokens depending on chars/token
|
| 242 |
current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else 3000
|
| 243 |
-
# Model name is just the argument (filename)
|
| 244 |
model_name = model_name_arg if model_name_arg else ""
|
| 245 |
|
| 246 |
system_prompt_template = ""
|
|
@@ -248,12 +514,11 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
|
|
| 248 |
with open(prompt_path, 'r', encoding='utf-8') as f:
|
| 249 |
system_prompt_template = f.read()
|
| 250 |
else:
|
| 251 |
-
# Fallback se arquivo nao existir
|
| 252 |
print("Aviso: prompt.txt não encontrado. Usando prompt interno.")
|
| 253 |
system_prompt_template = """You are a World-Class Viral Video Editor.
|
| 254 |
{context_instruction}
|
| 255 |
Analyze the transcript below with time tags (XXs). Find {amount} viral segments.
|
| 256 |
-
Constraints: {min_duration}
|
| 257 |
IMPORTANT: Output "Title", "Hook", and "Reasoning" in the SAME LANGUAGE as the transcript (e.g., if transcript is Portuguese, output Portuguese).
|
| 258 |
TRANSCRIPT:
|
| 259 |
{transcript_chunk}
|
|
@@ -276,11 +541,8 @@ OUTPUT JSON ONLY:
|
|
| 276 |
}
|
| 277 |
'''
|
| 278 |
|
| 279 |
-
#
|
| 280 |
-
# Split content into chunks with OVERLAP
|
| 281 |
chunk_size = int(current_chunk_size)
|
| 282 |
-
|
| 283 |
-
# Define overlap size (e.g. 10% of chunk size or min 1000 chars)
|
| 284 |
overlap_size = max(1000, int(chunk_size * 0.1))
|
| 285 |
|
| 286 |
chunks = []
|
|
@@ -291,27 +553,16 @@ OUTPUT JSON ONLY:
|
|
| 291 |
|
| 292 |
while start < content_len:
|
| 293 |
end = min(start + chunk_size, content_len)
|
| 294 |
-
|
| 295 |
-
# Align End to newline to avoid cutting sentences is useless here since we process raw text line.
|
| 296 |
-
# But our `formatted_content` has newlines from preprocess? Actually `preprocess_transcript_for_ai` concats with " ".
|
| 297 |
-
# So we look for space.
|
| 298 |
-
|
| 299 |
if end < content_len:
|
| 300 |
last_space = content.rfind(' ', start, end)
|
| 301 |
if last_space != -1 and last_space > start:
|
| 302 |
end = last_space
|
| 303 |
-
|
| 304 |
chunk_text = content[start:end]
|
| 305 |
-
if chunk_text.strip():
|
| 306 |
chunks.append(chunk_text)
|
| 307 |
-
|
| 308 |
if end >= content_len:
|
| 309 |
break
|
| 310 |
-
|
| 311 |
-
# Prepare start for next chunk (Backtrack by overlap)
|
| 312 |
next_start = max(start + 1, end - overlap_size)
|
| 313 |
-
|
| 314 |
-
# Align next_start to space
|
| 315 |
safe_space = content.rfind(' ', start, next_start)
|
| 316 |
if safe_space != -1:
|
| 317 |
start = safe_space + 1
|
|
@@ -329,7 +580,6 @@ OUTPUT JSON ONLY:
|
|
| 329 |
if len(chunks) > 1:
|
| 330 |
context_instruction = f"Part {i+1} of {len(chunks)}. "
|
| 331 |
|
| 332 |
-
# Preencher o template
|
| 333 |
try:
|
| 334 |
prompt = system_prompt_template.format(
|
| 335 |
context_instruction=context_instruction,
|
|
@@ -338,12 +588,9 @@ OUTPUT JSON ONLY:
|
|
| 338 |
max_duration=tempo_maximo,
|
| 339 |
transcript_chunk=chunk,
|
| 340 |
json_template=json_template,
|
| 341 |
-
amount=quantidade_de_virals
|
| 342 |
)
|
| 343 |
except KeyError as e:
|
| 344 |
-
# Fallback se o user bagunçou o txt e esqueceu chaves ou colocou chaves erradas
|
| 345 |
-
# Tenta um replace manual basico ou avisa erro, mas ideal é não quebrar.
|
| 346 |
-
# Vamos usar replace seguro
|
| 347 |
prompt = system_prompt_template
|
| 348 |
prompt = prompt.replace("{context_instruction}", context_instruction)
|
| 349 |
prompt = prompt.replace("{virality_instruction}", virality_instruction)
|
|
@@ -355,31 +602,26 @@ OUTPUT JSON ONLY:
|
|
| 355 |
|
| 356 |
output_texts.append(prompt)
|
| 357 |
|
| 358 |
-
# --- Save Full Prompt for Reference ---
|
| 359 |
try:
|
| 360 |
full_prompt_path = os.path.join(project_folder, "prompt_full.txt")
|
| 361 |
-
# Prepare full prompt using replace to be safe
|
| 362 |
full_prompt = system_prompt_template
|
| 363 |
full_prompt = full_prompt.replace("{context_instruction}", "Full Video Transcript Analysis")
|
| 364 |
full_prompt = full_prompt.replace("{virality_instruction}", virality_instruction)
|
| 365 |
full_prompt = full_prompt.replace("{min_duration}", str(tempo_minimo))
|
| 366 |
full_prompt = full_prompt.replace("{max_duration}", str(tempo_maximo))
|
| 367 |
-
full_prompt = full_prompt.replace("{transcript_chunk}", content)
|
| 368 |
full_prompt = full_prompt.replace("{json_template}", json_template)
|
| 369 |
full_prompt = full_prompt.replace("{amount}", str(quantidade_de_virals))
|
| 370 |
|
| 371 |
with open(full_prompt_path, "w", encoding="utf-8") as f:
|
| 372 |
f.write(full_prompt)
|
| 373 |
-
# print(f"[INFO] Full reference prompt saved to: {full_prompt_path}")
|
| 374 |
except Exception as e:
|
| 375 |
print(f"[WARN] Could not save prompt_full.txt: {e}")
|
| 376 |
-
# -------------------------------------
|
| 377 |
|
| 378 |
-
|
| 379 |
|
| 380 |
print(f"Processando {len(output_texts)} chunks usando modo: {ai_mode.upper()}")
|
| 381 |
|
| 382 |
-
# Initialize Local Model if needed (Once)
|
| 383 |
local_llm_instance = None
|
| 384 |
if ai_mode == "local":
|
| 385 |
if not HAS_LLAMA_CPP:
|
|
@@ -387,10 +629,9 @@ OUTPUT JSON ONLY:
|
|
| 387 |
return {"segments": []}
|
| 388 |
|
| 389 |
models_dir = os.path.join(base_dir, 'models')
|
| 390 |
-
# Check if model_name is full path or filename
|
| 391 |
model_path = os.path.join(models_dir, model_name)
|
| 392 |
if not os.path.exists(model_path):
|
| 393 |
-
if os.path.exists(model_name):
|
| 394 |
model_path = model_name
|
| 395 |
else:
|
| 396 |
print(f"Error: Model not found at {model_path}")
|
|
@@ -398,7 +639,6 @@ OUTPUT JSON ONLY:
|
|
| 398 |
|
| 399 |
print(f"[INFO] Loading Local Model: {os.path.basename(model_path)} (This may take a while)...")
|
| 400 |
try:
|
| 401 |
-
# Adjust n_gpu_layers=-1 for max GPU usage. n_ctx=8192 for long context.
|
| 402 |
local_llm_instance = Llama(
|
| 403 |
model_path=model_path,
|
| 404 |
n_gpu_layers=-1,
|
|
@@ -411,8 +651,6 @@ OUTPUT JSON ONLY:
|
|
| 411 |
|
| 412 |
for i, prompt in enumerate(output_texts):
|
| 413 |
response_text = ""
|
| 414 |
-
|
| 415 |
-
# Always save prompt to file (Manual, Gemini, or G4F)
|
| 416 |
manual_prompt_path = os.path.join(project_folder, f"prompt_part_{i+1}.txt")
|
| 417 |
try:
|
| 418 |
with open(manual_prompt_path, "w", encoding="utf-8") as f:
|
|
@@ -422,7 +660,6 @@ OUTPUT JSON ONLY:
|
|
| 422 |
|
| 423 |
if ai_mode == "manual":
|
| 424 |
print(f"\n[INFO] O prompt foi salvo em: {manual_prompt_path}")
|
| 425 |
-
|
| 426 |
print("\n" + "="*60)
|
| 427 |
print(f"CHUNK {i+1}/{len(output_texts)}")
|
| 428 |
print("="*60)
|
|
@@ -446,11 +683,10 @@ OUTPUT JSON ONLY:
|
|
| 446 |
print(f"Arquivo {response_json_path} não encontrado.")
|
| 447 |
else:
|
| 448 |
response_text = user_input
|
| 449 |
-
# Tenta ler mais linhas se parecer incompleto (bruteforce simples)
|
| 450 |
if response_text.strip().startswith("{") and not response_text.strip().endswith("}"):
|
| 451 |
print("Parece incompleto. Cole o resto e dê Enter (ou Ctrl+C para cancelar):")
|
| 452 |
try:
|
| 453 |
-
rest = sys.stdin.read()
|
| 454 |
response_text += rest
|
| 455 |
except:
|
| 456 |
pass
|
|
@@ -458,15 +694,12 @@ OUTPUT JSON ONLY:
|
|
| 458 |
elif ai_mode == "gemini":
|
| 459 |
print(f"Enviando chunk {i+1} para o Gemini (Model: {model_name})...")
|
| 460 |
response_text = call_gemini(prompt, api_key, model_name=model_name)
|
| 461 |
-
|
| 462 |
elif ai_mode == "g4f":
|
| 463 |
print(f"Enviando chunk {i+1} para o G4F (Model: {model_name})...")
|
| 464 |
response_text = call_g4f(prompt, model_name=model_name)
|
| 465 |
-
|
| 466 |
elif ai_mode == "local" and local_llm_instance:
|
| 467 |
print(f"Processing chunk {i+1} with Local LLM...")
|
| 468 |
try:
|
| 469 |
-
# Use chat completion for better formatting handling
|
| 470 |
output = local_llm_instance.create_chat_completion(
|
| 471 |
messages=[
|
| 472 |
{"role": "system", "content": "You are a helpful assistant that outputs only JSON."},
|
|
@@ -488,214 +721,23 @@ OUTPUT JSON ONLY:
|
|
| 488 |
print(f"[DEBUG] Raw response saved to: {raw_response_path}")
|
| 489 |
except Exception as e:
|
| 490 |
print(f"[WARN] Failed to save raw response: {e}")
|
| 491 |
-
# ----------------------------------------
|
| 492 |
|
| 493 |
# Processar resposta
|
| 494 |
try:
|
| 495 |
data = clean_json_response(response_text)
|
| 496 |
chunk_segments = data.get("segments", [])
|
| 497 |
print(f"Encontrados {len(chunk_segments)} segmentos neste chunk.")
|
| 498 |
-
|
| 499 |
except json.JSONDecodeError:
|
| 500 |
-
print(f"Erro: Resposta inválida
|
| 501 |
-
print(f"Conteúdo recebido (primeiros 100 chars): {response_text[:100]}...")
|
| 502 |
except Exception as e:
|
| 503 |
print(f"Erro desconhecido ao processar chunk: {e}")
|
| 504 |
|
| 505 |
-
#
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
# Helper to find text in segments
|
| 515 |
-
def find_timestamp_by_text(target_text, segments_list, start_search_idx=0, is_end=False):
|
| 516 |
-
# Normalize target
|
| 517 |
-
target_clean = "".join(target_text.lower().split())
|
| 518 |
-
if not target_clean: return None, start_search_idx
|
| 519 |
-
|
| 520 |
-
current_concat = ""
|
| 521 |
-
param_idx = -1
|
| 522 |
-
|
| 523 |
-
# Sliding window or simple linear scan?
|
| 524 |
-
# Linear scan matches sequences of words.
|
| 525 |
-
# We look for the FIRST occurrence of target_text in segments_list starting from start_search_idx
|
| 526 |
-
|
| 527 |
-
# Optimization: Create a long string of remaining segments and find index, then map back?
|
| 528 |
-
# Better: iterate segments.
|
| 529 |
-
|
| 530 |
-
for i in range(start_search_idx, len(segments_list)):
|
| 531 |
-
seg_text = segments_list[i]['text']
|
| 532 |
-
# We treat this simple: check if target is basically inside this segment or spanning a few.
|
| 533 |
-
# Since target is "5-10 words", it might span 2 segments.
|
| 534 |
-
|
| 535 |
-
# Simple approach: Check if target (normalized) is substring of
|
| 536 |
-
# (prev + current + next) normalized.
|
| 537 |
-
# This is complex.
|
| 538 |
-
|
| 539 |
-
# SIMPLER APPROACH:
|
| 540 |
-
# The AI returns 'start_time_ref' (e.g., "(12s)").
|
| 541 |
-
# We jump to that time in segments_list.
|
| 542 |
-
# Then we look for the text in that vicinity.
|
| 543 |
-
pass
|
| 544 |
-
|
| 545 |
-
return None, -1
|
| 546 |
-
|
| 547 |
-
# SIMPLIFIED MATCHING LOGIC
|
| 548 |
-
# 1. Use 'start_time_ref' to find approximate index.
|
| 549 |
-
# 2. Search locally for 'start_text'.
|
| 550 |
-
# 3. Search forward for 'end_text'.
|
| 551 |
-
|
| 552 |
-
print(f"[DEBUG] Matching {len(all_segments)} raw segments to timestamps...")
|
| 553 |
-
|
| 554 |
-
for seg in all_segments:
|
| 555 |
-
try:
|
| 556 |
-
# 1. Parse Reference Time
|
| 557 |
-
ref_time_str = seg.get('start_time_ref', '(0s)')
|
| 558 |
-
ref_time_val = 0
|
| 559 |
-
try:
|
| 560 |
-
ref_time_val = int(re.search(r'\d+', ref_time_str).group())
|
| 561 |
-
except:
|
| 562 |
-
ref_time_val = 0
|
| 563 |
-
|
| 564 |
-
# Find segment index closest to ref_time
|
| 565 |
-
start_idx = 0
|
| 566 |
-
min_diff = 999999
|
| 567 |
-
for i, s in enumerate(transcript_segments):
|
| 568 |
-
diff = abs(s['start'] - ref_time_val)
|
| 569 |
-
if diff < min_diff:
|
| 570 |
-
min_diff = diff
|
| 571 |
-
start_idx = i
|
| 572 |
-
if s['start'] > ref_time_val + 10: # Stop if we went too far
|
| 573 |
-
break
|
| 574 |
-
|
| 575 |
-
# Backtrack a bit in case Ref was slightly off or text started earlier
|
| 576 |
-
start_idx = max(0, start_idx - 5)
|
| 577 |
-
|
| 578 |
-
# 2. Find Exact Start Text
|
| 579 |
-
start_text_target = seg.get('start_text', '').lower().strip()
|
| 580 |
-
# Normalize: remove punctuation
|
| 581 |
-
start_text_target = re.sub(r'[^\w\s]', '', start_text_target)
|
| 582 |
-
|
| 583 |
-
final_start_time = -1
|
| 584 |
-
match_start_idx = -1
|
| 585 |
-
|
| 586 |
-
# Search window: forward 50 segments
|
| 587 |
-
search_limit = min(len(transcript_segments), start_idx + 50)
|
| 588 |
-
|
| 589 |
-
for i in range(start_idx, search_limit):
|
| 590 |
-
s_text = transcript_segments[i]['text'].lower()
|
| 591 |
-
s_text = re.sub(r'[^\w\s]', '', s_text)
|
| 592 |
-
|
| 593 |
-
# Check for partial match (start of sentence)
|
| 594 |
-
if start_text_target and (start_text_target in s_text or s_text in start_text_target):
|
| 595 |
-
final_start_time = transcript_segments[i]['start']
|
| 596 |
-
match_start_idx = i
|
| 597 |
-
break
|
| 598 |
-
|
| 599 |
-
# Fallback: use Ref Time if text match fails
|
| 600 |
-
if final_start_time == -1:
|
| 601 |
-
final_start_time = transcript_segments[start_idx]['start'] if start_idx < len(transcript_segments) else ref_time_val
|
| 602 |
-
match_start_idx = start_idx
|
| 603 |
-
|
| 604 |
-
# 3. Find End Text (starting from match_start_idx)
|
| 605 |
-
end_text_target = seg.get('end_text', '').lower().strip()
|
| 606 |
-
end_text_target = re.sub(r'[^\w\s]', '', end_text_target)
|
| 607 |
-
|
| 608 |
-
final_end_time = -1
|
| 609 |
-
|
| 610 |
-
if match_start_idx != -1:
|
| 611 |
-
# Search forward for end text, extended range
|
| 612 |
-
# Use a larger window but we will sanity check duration later
|
| 613 |
-
search_end_limit = min(len(transcript_segments), match_start_idx + 200)
|
| 614 |
-
|
| 615 |
-
for i in range(match_start_idx, search_end_limit):
|
| 616 |
-
s_text = transcript_segments[i]['text'].lower()
|
| 617 |
-
s_text = re.sub(r'[^\w\s]', '', s_text)
|
| 618 |
-
|
| 619 |
-
if end_text_target and (end_text_target in s_text or s_text in end_text_target):
|
| 620 |
-
final_end_time = transcript_segments[i]['end']
|
| 621 |
-
break
|
| 622 |
-
|
| 623 |
-
# Fallback End Time checking Duration
|
| 624 |
-
if final_end_time == -1:
|
| 625 |
-
final_end_time = final_start_time + tempo_minimo # safe default
|
| 626 |
-
|
| 627 |
-
# Calculate Duration
|
| 628 |
-
duration = final_end_time - final_start_time
|
| 629 |
-
|
| 630 |
-
# Validate Duration (Min)
|
| 631 |
-
if duration < 5:
|
| 632 |
-
duration = tempo_minimo
|
| 633 |
-
final_end_time = final_start_time + duration
|
| 634 |
-
|
| 635 |
-
# Validate Duration (Max)
|
| 636 |
-
# If AI selected start and end points that result in a huge segment, clamp it.
|
| 637 |
-
if duration > tempo_maximo:
|
| 638 |
-
print(f"[WARN] Segmento excede max duration ({duration:.2f}s > {tempo_maximo}s). Cortando para {tempo_maximo}s.")
|
| 639 |
-
final_end_time = final_start_time + tempo_maximo
|
| 640 |
-
duration = tempo_maximo
|
| 641 |
-
|
| 642 |
-
# Construct Final Segment
|
| 643 |
-
processed_segments.append({
|
| 644 |
-
"title": seg.get('title', 'Viral Segment'),
|
| 645 |
-
"start_time": final_start_time,
|
| 646 |
-
"end_time": final_end_time,
|
| 647 |
-
"hook": seg.get('title', ''), # Use title as hook text
|
| 648 |
-
"reasoning": seg.get('reasoning', ''),
|
| 649 |
-
"score": seg.get('score', 0),
|
| 650 |
-
"duration": duration
|
| 651 |
-
})
|
| 652 |
-
|
| 653 |
-
except Exception as e:
|
| 654 |
-
print(f"[WARN] Error processing segment {seg}: {e}")
|
| 655 |
-
continue
|
| 656 |
-
|
| 657 |
-
# Deduplication (Keep highest score)
|
| 658 |
-
unique_segments = []
|
| 659 |
-
# Sort by Score desc
|
| 660 |
-
processed_segments.sort(key=lambda x: int(x.get('score', 0)), reverse=True)
|
| 661 |
-
|
| 662 |
-
for candidate in processed_segments:
|
| 663 |
-
is_dup = False
|
| 664 |
-
for existing in unique_segments:
|
| 665 |
-
s1, e1 = candidate['start_time'], candidate['end_time']
|
| 666 |
-
s2, e2 = existing['start_time'], existing['end_time']
|
| 667 |
-
|
| 668 |
-
overlap_start = max(s1, s2)
|
| 669 |
-
overlap_end = min(e1, e2)
|
| 670 |
-
|
| 671 |
-
if overlap_end > overlap_start:
|
| 672 |
-
intersection = overlap_end - overlap_start
|
| 673 |
-
if intersection > 5: # more than 5 seconds overlap
|
| 674 |
-
is_dup = True
|
| 675 |
-
break
|
| 676 |
-
if not is_dup:
|
| 677 |
-
unique_segments.append(candidate)
|
| 678 |
-
|
| 679 |
-
all_segments = unique_segments
|
| 680 |
-
print(f"[DEBUG] Finished processing. {len(all_segments)} segments valid.")
|
| 681 |
-
# ---------------------------
|
| 682 |
-
|
| 683 |
-
# Limit to the requested number of segments
|
| 684 |
-
if quantidade_de_virals and len(all_segments) > quantidade_de_virals:
|
| 685 |
-
print(f"Filtrando os top {quantidade_de_virals} segmentos de {len(all_segments)} candidatos encontrados nos chunks.")
|
| 686 |
-
all_segments = all_segments[:quantidade_de_virals]
|
| 687 |
-
|
| 688 |
-
final_result = {"segments": all_segments}
|
| 689 |
-
|
| 690 |
-
# Validação básica de duração nos resultados (opcional, mas bom pra evitar erros no ffmpeg)
|
| 691 |
-
# Convertendo milliseconds pra int se necessário, garantindo sanidade
|
| 692 |
-
validated_segments = []
|
| 693 |
-
for seg in final_result['segments']:
|
| 694 |
-
# Garante start_time
|
| 695 |
-
if 'start_time' in seg:
|
| 696 |
-
# Deixa passar, cut_segments lida com int/str conversion
|
| 697 |
-
validated_segments.append(seg)
|
| 698 |
-
|
| 699 |
-
final_result['segments'] = validated_segments
|
| 700 |
-
|
| 701 |
-
return final_result
|
|
|
|
| 3 |
import re
|
| 4 |
import sys
|
| 5 |
import time
|
| 6 |
+
import ast
|
| 7 |
|
| 8 |
# Tenta importar bibliotecas de IA opcionalmente
|
| 9 |
try:
|
|
|
|
| 12 |
except ImportError:
|
| 13 |
HAS_GEMINI = False
|
| 14 |
|
| 15 |
+
try:
|
| 16 |
+
import g4f
|
| 17 |
+
HAS_G4F = True
|
| 18 |
except ImportError:
|
| 19 |
HAS_G4F = False
|
| 20 |
|
|
|
|
| 25 |
HAS_LLAMA_CPP = False
|
| 26 |
|
| 27 |
def clean_json_response(response_text):
|
| 28 |
+
"""
|
| 29 |
+
Limpa a resposta focando em encontrar o objeto JSON que contém a chave "segments".
|
| 30 |
+
Estratégia: Busca a palavra "segments", encontra o '{' anterior e usa raw_decode.
|
| 31 |
+
"""
|
| 32 |
+
if not isinstance(response_text, str):
|
| 33 |
+
response_text = str(response_text)
|
| 34 |
+
|
| 35 |
if not response_text:
|
| 36 |
return {"segments": []}
|
| 37 |
+
|
| 38 |
+
# 1. Limpeza preliminar
|
| 39 |
+
# Remove tags de pensamento (DeepSeek R1)
|
| 40 |
+
response_text = re.sub(r'<think>.*?</think>', '', response_text, flags=re.DOTALL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
# Normaliza escapes excessivos (\n virando \\n) e aspas se parecer necessário
|
| 43 |
+
try:
|
| 44 |
+
if "\\n" in response_text or "\\\"" in response_text:
|
| 45 |
+
# Tenta um decode básico de escapes
|
| 46 |
+
response_text = response_text.replace("\\n", "\n").replace("\\\"", "\"").replace("\\'", "'")
|
| 47 |
+
except:
|
| 48 |
+
pass
|
| 49 |
+
|
| 50 |
+
# 2. Busca pela palavra-chave "segments"
|
| 51 |
+
# Procura índices de todas as ocorrências de 'segments'
|
| 52 |
+
matches = [m.start() for m in re.finditer(r'segments', response_text)]
|
| 53 |
|
| 54 |
+
if not matches:
|
| 55 |
+
# Se não achou segments, retorna vazio
|
| 56 |
+
return {"segments": []}
|
| 57 |
+
|
| 58 |
+
# Tenta extrair JSON válido a partir de cada ocorrência
|
| 59 |
+
for match_idx in matches:
|
| 60 |
+
# Procura o '{' mais próximo ANTES de "segments"
|
| 61 |
+
# Limita busca a 5000 chars para trás para performance
|
| 62 |
+
start_search = max(0, match_idx - 5000)
|
| 63 |
+
snippet_before = response_text[start_search:match_idx]
|
| 64 |
+
|
| 65 |
+
# Encontra o ÚLTIMO '{' no snippet
|
| 66 |
+
last_open_rel = snippet_before.rfind('{')
|
| 67 |
+
|
| 68 |
+
if last_open_rel != -1:
|
| 69 |
+
real_start = start_search + last_open_rel
|
| 70 |
+
candidate_text = response_text[real_start:]
|
| 71 |
+
|
| 72 |
+
# Tentativa A: json.raw_decode
|
| 73 |
+
try:
|
| 74 |
+
decoder = json.JSONDecoder()
|
| 75 |
+
obj, _ = decoder.raw_decode(candidate_text)
|
| 76 |
+
if 'segments' in obj and isinstance(obj['segments'], list):
|
| 77 |
+
return obj
|
| 78 |
+
except:
|
| 79 |
+
pass
|
| 80 |
+
|
| 81 |
+
# Tentativa B: ast.literal_eval
|
| 82 |
+
try:
|
| 83 |
+
balance = 0
|
| 84 |
+
in_string = False
|
| 85 |
+
escape = False
|
| 86 |
+
found_end = -1
|
| 87 |
+
|
| 88 |
+
for i, char in enumerate(candidate_text):
|
| 89 |
+
if escape:
|
| 90 |
+
escape = False
|
| 91 |
+
continue
|
| 92 |
+
if char == '\\':
|
| 93 |
+
escape = True
|
| 94 |
+
continue
|
| 95 |
+
if char == "'" or char == '"':
|
| 96 |
+
in_string = not in_string
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
if not in_string:
|
| 100 |
+
if char == '{':
|
| 101 |
+
balance += 1
|
| 102 |
+
elif char == '}':
|
| 103 |
+
balance -= 1
|
| 104 |
+
if balance == 0:
|
| 105 |
+
found_end = i
|
| 106 |
+
break
|
| 107 |
+
|
| 108 |
+
if found_end != -1:
|
| 109 |
+
clean_cand = candidate_text[:found_end+1]
|
| 110 |
+
obj = ast.literal_eval(clean_cand)
|
| 111 |
+
if 'segments' in obj and isinstance(obj['segments'], list):
|
| 112 |
+
return obj
|
| 113 |
+
except:
|
| 114 |
+
pass
|
| 115 |
+
|
| 116 |
+
# 3. Fallback: Extração bruta de markdown
|
| 117 |
+
try:
|
| 118 |
+
match = re.search(r"```json(.*?)```", response_text, re.DOTALL)
|
| 119 |
+
if match:
|
| 120 |
+
return json.loads(match.group(1))
|
| 121 |
+
except:
|
| 122 |
+
pass
|
| 123 |
+
|
| 124 |
+
return {"segments": []}
|
| 125 |
|
| 126 |
|
| 127 |
def preprocess_transcript_for_ai(segments):
|
| 128 |
"""
|
| 129 |
Concatenates transcript segments into a single string with embedded time tags.
|
|
|
|
|
|
|
| 130 |
"""
|
| 131 |
if not segments:
|
| 132 |
return ""
|
|
|
|
| 143 |
text = seg.get('text', '').strip()
|
| 144 |
end_time = seg.get('end', 0)
|
| 145 |
|
|
|
|
| 146 |
full_text += text + " "
|
| 147 |
|
|
|
|
| 148 |
if end_time - last_tag_time >= 4:
|
| 149 |
full_text += f"({int(end_time)}s) "
|
| 150 |
last_tag_time = end_time
|
|
|
|
| 169 |
except Exception as e:
|
| 170 |
error_str = str(e)
|
| 171 |
if "429" in error_str or "Quota exceeded" in error_str:
|
| 172 |
+
wait_time = base_wait * (attempt + 1)
|
| 173 |
|
|
|
|
| 174 |
match = re.search(r"retry in (\d+(\.\d+)?)s", error_str)
|
| 175 |
if match:
|
| 176 |
+
wait_time = float(match.group(1)) + 5.0
|
| 177 |
|
| 178 |
print(f"[429] Quota Exceeded. Waiting {wait_time:.2f}s before retry {attempt+1}/{max_retries}...", flush=True)
|
| 179 |
time.sleep(wait_time)
|
|
|
|
| 189 |
if not HAS_G4F:
|
| 190 |
raise ImportError("A biblioteca 'g4f' não está instalada. Instale com: pip install g4f")
|
| 191 |
|
| 192 |
+
max_retries = 3
|
| 193 |
+
base_wait = 5
|
| 194 |
+
|
| 195 |
+
for attempt in range(max_retries):
|
| 196 |
+
try:
|
| 197 |
+
response = g4f.ChatCompletion.create(
|
| 198 |
+
model=model_name,
|
| 199 |
+
messages=[{"role": "user", "content": prompt}],
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
if isinstance(response, dict):
|
| 203 |
+
if 'error' in response:
|
| 204 |
+
raise Exception(f"API Error: {response['error']}")
|
| 205 |
+
if 'choices' in response and isinstance(response['choices'], list):
|
| 206 |
+
if len(response['choices']) > 0:
|
| 207 |
+
content = response['choices'][0].get('message', {}).get('content', '')
|
| 208 |
+
if content:
|
| 209 |
+
return content
|
| 210 |
+
if not response:
|
| 211 |
+
raise ValueError("Empty Dict response")
|
| 212 |
+
|
| 213 |
+
return json.dumps(response)
|
| 214 |
+
|
| 215 |
+
if not response:
|
| 216 |
+
print(f"[WARN] G4F retornou resposta vazia. Tentativa {attempt+1}/{max_retries}")
|
| 217 |
+
time.sleep(base_wait)
|
| 218 |
+
continue
|
| 219 |
+
|
| 220 |
+
try:
|
| 221 |
+
return json.dumps(response, ensure_ascii=False)
|
| 222 |
+
except:
|
| 223 |
+
return str(response)
|
| 224 |
+
|
| 225 |
+
except Exception as e:
|
| 226 |
+
print(f"[WARN] Erro na API do G4F (Tentativa {attempt+1}/{max_retries}): {e}")
|
| 227 |
+
if attempt < max_retries - 1:
|
| 228 |
+
wait_time = base_wait * (attempt + 1)
|
| 229 |
+
time.sleep(wait_time)
|
| 230 |
+
|
| 231 |
+
print(f"Falha crítica após {max_retries} tentativas no G4F.")
|
| 232 |
+
return "{}"
|
| 233 |
|
| 234 |
+
def load_transcript(project_folder):
|
| 235 |
+
"""Parses input.tsv or input.srt from the project folder."""
|
| 236 |
input_tsv = os.path.join(project_folder, 'input.tsv')
|
| 237 |
input_srt = os.path.join(project_folder, 'input.srt')
|
| 238 |
|
|
|
|
| 239 |
transcript_segments = []
|
| 240 |
|
| 241 |
# Try to load TSV first (more reliable time)
|
|
|
|
| 262 |
if not transcript_segments and os.path.exists(input_srt):
|
| 263 |
with open(input_srt, 'r', encoding='utf-8') as f:
|
| 264 |
srt_content = f.read()
|
|
|
|
|
|
|
| 265 |
pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n((?:(?!\n\n).)*)', re.DOTALL)
|
| 266 |
matches = pattern.findall(srt_content)
|
| 267 |
|
|
|
|
| 277 |
|
| 278 |
if not transcript_segments:
|
| 279 |
raise ValueError("Could not parse transcript from TSV or SRT.")
|
| 280 |
+
|
| 281 |
+
return transcript_segments
|
| 282 |
|
| 283 |
+
def process_segments(raw_segments, transcript_segments, min_duration, max_duration, output_count=None):
|
| 284 |
+
"""
|
| 285 |
+
Aligns raw AI segments (with reference tags) to actual transcript timestamps.
|
| 286 |
+
Applies constraints, validation, and deduplication.
|
| 287 |
+
"""
|
| 288 |
+
|
| 289 |
+
all_segments = raw_segments
|
| 290 |
+
tempo_minimo = min_duration
|
| 291 |
+
tempo_maximo = max_duration
|
| 292 |
+
|
| 293 |
+
# Sort segments by score (descending)
|
| 294 |
+
try:
|
| 295 |
+
all_segments.sort(key=lambda x: int(x.get('score', 0)), reverse=True)
|
| 296 |
+
except:
|
| 297 |
+
pass
|
| 298 |
+
|
| 299 |
+
# --- POST-PROCESSING: Match Text to Timestamps ---
|
| 300 |
+
processed_segments = []
|
| 301 |
+
|
| 302 |
+
print(f"[DEBUG] Matching {len(all_segments)} raw segments to timestamps...")
|
| 303 |
+
|
| 304 |
+
for seg in all_segments:
|
| 305 |
+
try:
|
| 306 |
+
# 1. Parse Reference Time
|
| 307 |
+
ref_time_str = seg.get('start_time_ref', '(0s)')
|
| 308 |
+
ref_time_val = 0
|
| 309 |
+
try:
|
| 310 |
+
if isinstance(ref_time_str, str):
|
| 311 |
+
match = re.search(r'\d+', ref_time_str)
|
| 312 |
+
if match:
|
| 313 |
+
ref_time_val = int(match.group())
|
| 314 |
+
else:
|
| 315 |
+
ref_time_val = int(ref_time_str)
|
| 316 |
+
except:
|
| 317 |
+
ref_time_val = 0
|
| 318 |
+
|
| 319 |
+
# Find segment index closest to ref_time
|
| 320 |
+
start_idx = 0
|
| 321 |
+
min_diff = 999999
|
| 322 |
+
for i, s in enumerate(transcript_segments):
|
| 323 |
+
diff = abs(s['start'] - ref_time_val)
|
| 324 |
+
if diff < min_diff:
|
| 325 |
+
min_diff = diff
|
| 326 |
+
start_idx = i
|
| 327 |
+
if s['start'] > ref_time_val + 10:
|
| 328 |
+
break
|
| 329 |
+
|
| 330 |
+
# Backtrack
|
| 331 |
+
start_idx = max(0, start_idx - 5)
|
| 332 |
+
|
| 333 |
+
# 2. Find Exact Start Text
|
| 334 |
+
start_text_target = seg.get('start_text', '').lower().strip()
|
| 335 |
+
# Normalize
|
| 336 |
+
start_text_target = re.sub(r'[^\w\s]', '', start_text_target)
|
| 337 |
+
|
| 338 |
+
final_start_time = -1
|
| 339 |
+
match_start_idx = -1
|
| 340 |
+
|
| 341 |
+
# Search window
|
| 342 |
+
search_limit = min(len(transcript_segments), start_idx + 50)
|
| 343 |
+
|
| 344 |
+
for i in range(start_idx, search_limit):
|
| 345 |
+
s_text = transcript_segments[i]['text'].lower()
|
| 346 |
+
s_text = re.sub(r'[^\w\s]', '', s_text)
|
| 347 |
+
|
| 348 |
+
# Check for partial match
|
| 349 |
+
if start_text_target and (start_text_target in s_text or s_text in start_text_target):
|
| 350 |
+
final_start_time = transcript_segments[i]['start']
|
| 351 |
+
match_start_idx = i
|
| 352 |
+
break
|
| 353 |
+
|
| 354 |
+
# Fallback
|
| 355 |
+
if final_start_time == -1:
|
| 356 |
+
final_start_time = transcript_segments[start_idx]['start'] if start_idx < len(transcript_segments) else ref_time_val
|
| 357 |
+
match_start_idx = start_idx
|
| 358 |
+
|
| 359 |
+
# 3. Find End Text
|
| 360 |
+
end_text_target = seg.get('end_text', '').lower().strip()
|
| 361 |
+
end_text_target = re.sub(r'[^\w\s]', '', end_text_target)
|
| 362 |
+
|
| 363 |
+
final_end_time = -1
|
| 364 |
+
|
| 365 |
+
if match_start_idx != -1:
|
| 366 |
+
search_end_limit = min(len(transcript_segments), match_start_idx + 200)
|
| 367 |
+
|
| 368 |
+
for i in range(match_start_idx, search_end_limit):
|
| 369 |
+
s_text = transcript_segments[i]['text'].lower()
|
| 370 |
+
s_text = re.sub(r'[^\w\s]', '', s_text)
|
| 371 |
+
|
| 372 |
+
if end_text_target and (end_text_target in s_text or s_text in end_text_target):
|
| 373 |
+
final_end_time = transcript_segments[i]['end']
|
| 374 |
+
break
|
| 375 |
+
|
| 376 |
+
# Fallback End Time
|
| 377 |
+
if final_end_time == -1:
|
| 378 |
+
final_end_time = final_start_time + tempo_minimo
|
| 379 |
+
|
| 380 |
+
# Calculate Duration
|
| 381 |
+
duration = final_end_time - final_start_time
|
| 382 |
+
|
| 383 |
+
# Validate Duration (Min)
|
| 384 |
+
if duration < tempo_minimo:
|
| 385 |
+
print(f"[WARN] Segmento menor que duration min ({duration:.2f}s < {tempo_minimo}s). Estendendo para {tempo_minimo}s.")
|
| 386 |
+
duration = tempo_minimo
|
| 387 |
+
final_end_time = final_start_time + duration
|
| 388 |
+
|
| 389 |
+
# Validate Duration (Max)
|
| 390 |
+
if duration > tempo_maximo:
|
| 391 |
+
print(f"[WARN] Segmento excede max duration ({duration:.2f}s > {tempo_maximo}s). Cortando para {tempo_maximo}s.")
|
| 392 |
+
final_end_time = final_start_time + tempo_maximo
|
| 393 |
+
duration = tempo_maximo
|
| 394 |
+
|
| 395 |
+
# Construct Final Segment
|
| 396 |
+
processed_segments.append({
|
| 397 |
+
"title": seg.get('title', 'Viral Segment'),
|
| 398 |
+
"start_time": final_start_time,
|
| 399 |
+
"end_time": final_end_time,
|
| 400 |
+
"hook": seg.get('title', ''),
|
| 401 |
+
"reasoning": seg.get('reasoning', ''),
|
| 402 |
+
"score": seg.get('score', 0),
|
| 403 |
+
"duration": duration
|
| 404 |
+
})
|
| 405 |
+
|
| 406 |
+
except Exception as e:
|
| 407 |
+
print(f"[WARN] Error processing segment {seg}: {e}")
|
| 408 |
+
continue
|
| 409 |
+
|
| 410 |
+
# Deduplication
|
| 411 |
+
unique_segments = []
|
| 412 |
+
processed_segments.sort(key=lambda x: int(x.get('score', 0)), reverse=True)
|
| 413 |
|
| 414 |
+
for candidate in processed_segments:
|
| 415 |
+
is_dup = False
|
| 416 |
+
for existing in unique_segments:
|
| 417 |
+
s1, e1 = candidate['start_time'], candidate['end_time']
|
| 418 |
+
# Simple float equality isn't safe, but max/min handles it
|
| 419 |
+
s2, e2 = existing['start_time'], existing['end_time']
|
| 420 |
+
|
| 421 |
+
overlap_start = max(s1, s2)
|
| 422 |
+
overlap_end = min(e1, e2)
|
| 423 |
+
|
| 424 |
+
if overlap_end > overlap_start:
|
| 425 |
+
intersection = overlap_end - overlap_start
|
| 426 |
+
if intersection > 5: # more than 5 seconds overlap
|
| 427 |
+
is_dup = True
|
| 428 |
+
print(f"[DEBUG] Dropping overlap: '{candidate.get('title')}' ({s1:.1f}-{e1:.1f}) overlaps with '{existing.get('title')}' ({s2:.1f}-{e2:.1f}) by {intersection:.1f}s")
|
| 429 |
+
break
|
| 430 |
+
if not is_dup:
|
| 431 |
+
unique_segments.append(candidate)
|
| 432 |
+
|
| 433 |
+
all_segments = unique_segments
|
| 434 |
+
print(f"[DEBUG] Finished processing. {len(all_segments)} segments valid.")
|
| 435 |
+
|
| 436 |
+
if output_count and len(all_segments) > output_count:
|
| 437 |
+
print(f"Filtrando os top {output_count} segmentos de {len(all_segments)} candidatos encontrados nos chunks.")
|
| 438 |
+
all_segments = all_segments[:output_count]
|
| 439 |
+
|
| 440 |
+
final_result = {"segments": all_segments}
|
| 441 |
+
|
| 442 |
+
# Validação básica de que temos start_time
|
| 443 |
+
validated_segments = []
|
| 444 |
+
for seg in final_result['segments']:
|
| 445 |
+
if 'start_time' in seg:
|
| 446 |
+
validated_segments.append(seg)
|
| 447 |
+
|
| 448 |
+
final_result['segments'] = validated_segments
|
| 449 |
+
|
| 450 |
+
return final_result
|
| 451 |
+
|
| 452 |
+
|
| 453 |
+
def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode="manual", api_key=None, project_folder="tmp", chunk_size_arg=None, model_name_arg=None):
|
| 454 |
+
quantidade_de_virals = num_segments
|
| 455 |
+
|
| 456 |
+
# 1. Load Transcript
|
| 457 |
+
transcript_segments = load_transcript(project_folder)
|
| 458 |
+
|
| 459 |
+
# 2. Pre-process Content
|
| 460 |
+
formatted_content = preprocess_transcript_for_ai(transcript_segments)
|
| 461 |
content = formatted_content
|
| 462 |
|
| 463 |
# Load Config and Prompt
|
|
|
|
| 465 |
config_path = os.path.join(base_dir, 'api_config.json')
|
| 466 |
prompt_path = os.path.join(base_dir, 'prompt.txt')
|
| 467 |
|
|
|
|
| 468 |
config = {
|
| 469 |
"selected_api": "gemini",
|
| 470 |
"gemini": {
|
|
|
|
| 482 |
try:
|
| 483 |
with open(config_path, 'r', encoding='utf-8') as f:
|
| 484 |
loaded_config = json.load(f)
|
|
|
|
| 485 |
if "gemini" in loaded_config: config["gemini"].update(loaded_config["gemini"])
|
| 486 |
if "g4f" in loaded_config: config["g4f"].update(loaded_config["g4f"])
|
| 487 |
if "selected_api" in loaded_config: config["selected_api"] = loaded_config["selected_api"]
|
| 488 |
except Exception as e:
|
| 489 |
+
print(f"Erro ao ler api_config.json: {e}")
|
| 490 |
|
| 491 |
+
# Config Vars
|
| 492 |
+
current_chunk_size = 15000
|
| 493 |
model_name = ""
|
| 494 |
|
| 495 |
if ai_mode == "gemini":
|
| 496 |
cfg_chunk = config["gemini"].get("chunk_size", 15000)
|
| 497 |
current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else cfg_chunk
|
|
|
|
| 498 |
cfg_model = config["gemini"].get("model", "gemini-2.5-flash-lite-preview-09-2025")
|
| 499 |
model_name = model_name_arg if model_name_arg else cfg_model
|
| 500 |
+
if not api_key: api_key = config["gemini"].get("api_key", "")
|
|
|
|
|
|
|
| 501 |
|
| 502 |
elif ai_mode == "g4f":
|
| 503 |
cfg_chunk = config["g4f"].get("chunk_size", 2000)
|
| 504 |
current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else cfg_chunk
|
|
|
|
| 505 |
cfg_model = config["g4f"].get("model", "gpt-4o-mini")
|
| 506 |
model_name = model_name_arg if model_name_arg else cfg_model
|
| 507 |
|
| 508 |
elif ai_mode == "local":
|
|
|
|
| 509 |
current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else 3000
|
|
|
|
| 510 |
model_name = model_name_arg if model_name_arg else ""
|
| 511 |
|
| 512 |
system_prompt_template = ""
|
|
|
|
| 514 |
with open(prompt_path, 'r', encoding='utf-8') as f:
|
| 515 |
system_prompt_template = f.read()
|
| 516 |
else:
|
|
|
|
| 517 |
print("Aviso: prompt.txt não encontrado. Usando prompt interno.")
|
| 518 |
system_prompt_template = """You are a World-Class Viral Video Editor.
|
| 519 |
{context_instruction}
|
| 520 |
Analyze the transcript below with time tags (XXs). Find {amount} viral segments.
|
| 521 |
+
Constraints: Each segment MUST be between {min_duration} seconds and {max_duration} seconds.
|
| 522 |
IMPORTANT: Output "Title", "Hook", and "Reasoning" in the SAME LANGUAGE as the transcript (e.g., if transcript is Portuguese, output Portuguese).
|
| 523 |
TRANSCRIPT:
|
| 524 |
{transcript_chunk}
|
|
|
|
| 541 |
}
|
| 542 |
'''
|
| 543 |
|
| 544 |
+
# Chunking
|
|
|
|
| 545 |
chunk_size = int(current_chunk_size)
|
|
|
|
|
|
|
| 546 |
overlap_size = max(1000, int(chunk_size * 0.1))
|
| 547 |
|
| 548 |
chunks = []
|
|
|
|
| 553 |
|
| 554 |
while start < content_len:
|
| 555 |
end = min(start + chunk_size, content_len)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
if end < content_len:
|
| 557 |
last_space = content.rfind(' ', start, end)
|
| 558 |
if last_space != -1 and last_space > start:
|
| 559 |
end = last_space
|
|
|
|
| 560 |
chunk_text = content[start:end]
|
| 561 |
+
if chunk_text.strip():
|
| 562 |
chunks.append(chunk_text)
|
|
|
|
| 563 |
if end >= content_len:
|
| 564 |
break
|
|
|
|
|
|
|
| 565 |
next_start = max(start + 1, end - overlap_size)
|
|
|
|
|
|
|
| 566 |
safe_space = content.rfind(' ', start, next_start)
|
| 567 |
if safe_space != -1:
|
| 568 |
start = safe_space + 1
|
|
|
|
| 580 |
if len(chunks) > 1:
|
| 581 |
context_instruction = f"Part {i+1} of {len(chunks)}. "
|
| 582 |
|
|
|
|
| 583 |
try:
|
| 584 |
prompt = system_prompt_template.format(
|
| 585 |
context_instruction=context_instruction,
|
|
|
|
| 588 |
max_duration=tempo_maximo,
|
| 589 |
transcript_chunk=chunk,
|
| 590 |
json_template=json_template,
|
| 591 |
+
amount=quantidade_de_virals
|
| 592 |
)
|
| 593 |
except KeyError as e:
|
|
|
|
|
|
|
|
|
|
| 594 |
prompt = system_prompt_template
|
| 595 |
prompt = prompt.replace("{context_instruction}", context_instruction)
|
| 596 |
prompt = prompt.replace("{virality_instruction}", virality_instruction)
|
|
|
|
| 602 |
|
| 603 |
output_texts.append(prompt)
|
| 604 |
|
|
|
|
| 605 |
try:
|
| 606 |
full_prompt_path = os.path.join(project_folder, "prompt_full.txt")
|
|
|
|
| 607 |
full_prompt = system_prompt_template
|
| 608 |
full_prompt = full_prompt.replace("{context_instruction}", "Full Video Transcript Analysis")
|
| 609 |
full_prompt = full_prompt.replace("{virality_instruction}", virality_instruction)
|
| 610 |
full_prompt = full_prompt.replace("{min_duration}", str(tempo_minimo))
|
| 611 |
full_prompt = full_prompt.replace("{max_duration}", str(tempo_maximo))
|
| 612 |
+
full_prompt = full_prompt.replace("{transcript_chunk}", content)
|
| 613 |
full_prompt = full_prompt.replace("{json_template}", json_template)
|
| 614 |
full_prompt = full_prompt.replace("{amount}", str(quantidade_de_virals))
|
| 615 |
|
| 616 |
with open(full_prompt_path, "w", encoding="utf-8") as f:
|
| 617 |
f.write(full_prompt)
|
|
|
|
| 618 |
except Exception as e:
|
| 619 |
print(f"[WARN] Could not save prompt_full.txt: {e}")
|
|
|
|
| 620 |
|
| 621 |
+
all_raw_segments = []
|
| 622 |
|
| 623 |
print(f"Processando {len(output_texts)} chunks usando modo: {ai_mode.upper()}")
|
| 624 |
|
|
|
|
| 625 |
local_llm_instance = None
|
| 626 |
if ai_mode == "local":
|
| 627 |
if not HAS_LLAMA_CPP:
|
|
|
|
| 629 |
return {"segments": []}
|
| 630 |
|
| 631 |
models_dir = os.path.join(base_dir, 'models')
|
|
|
|
| 632 |
model_path = os.path.join(models_dir, model_name)
|
| 633 |
if not os.path.exists(model_path):
|
| 634 |
+
if os.path.exists(model_name):
|
| 635 |
model_path = model_name
|
| 636 |
else:
|
| 637 |
print(f"Error: Model not found at {model_path}")
|
|
|
|
| 639 |
|
| 640 |
print(f"[INFO] Loading Local Model: {os.path.basename(model_path)} (This may take a while)...")
|
| 641 |
try:
|
|
|
|
| 642 |
local_llm_instance = Llama(
|
| 643 |
model_path=model_path,
|
| 644 |
n_gpu_layers=-1,
|
|
|
|
| 651 |
|
| 652 |
for i, prompt in enumerate(output_texts):
|
| 653 |
response_text = ""
|
|
|
|
|
|
|
| 654 |
manual_prompt_path = os.path.join(project_folder, f"prompt_part_{i+1}.txt")
|
| 655 |
try:
|
| 656 |
with open(manual_prompt_path, "w", encoding="utf-8") as f:
|
|
|
|
| 660 |
|
| 661 |
if ai_mode == "manual":
|
| 662 |
print(f"\n[INFO] O prompt foi salvo em: {manual_prompt_path}")
|
|
|
|
| 663 |
print("\n" + "="*60)
|
| 664 |
print(f"CHUNK {i+1}/{len(output_texts)}")
|
| 665 |
print("="*60)
|
|
|
|
| 683 |
print(f"Arquivo {response_json_path} não encontrado.")
|
| 684 |
else:
|
| 685 |
response_text = user_input
|
|
|
|
| 686 |
if response_text.strip().startswith("{") and not response_text.strip().endswith("}"):
|
| 687 |
print("Parece incompleto. Cole o resto e dê Enter (ou Ctrl+C para cancelar):")
|
| 688 |
try:
|
| 689 |
+
rest = sys.stdin.read()
|
| 690 |
response_text += rest
|
| 691 |
except:
|
| 692 |
pass
|
|
|
|
| 694 |
elif ai_mode == "gemini":
|
| 695 |
print(f"Enviando chunk {i+1} para o Gemini (Model: {model_name})...")
|
| 696 |
response_text = call_gemini(prompt, api_key, model_name=model_name)
|
|
|
|
| 697 |
elif ai_mode == "g4f":
|
| 698 |
print(f"Enviando chunk {i+1} para o G4F (Model: {model_name})...")
|
| 699 |
response_text = call_g4f(prompt, model_name=model_name)
|
|
|
|
| 700 |
elif ai_mode == "local" and local_llm_instance:
|
| 701 |
print(f"Processing chunk {i+1} with Local LLM...")
|
| 702 |
try:
|
|
|
|
| 703 |
output = local_llm_instance.create_chat_completion(
|
| 704 |
messages=[
|
| 705 |
{"role": "system", "content": "You are a helpful assistant that outputs only JSON."},
|
|
|
|
| 721 |
print(f"[DEBUG] Raw response saved to: {raw_response_path}")
|
| 722 |
except Exception as e:
|
| 723 |
print(f"[WARN] Failed to save raw response: {e}")
|
|
|
|
| 724 |
|
| 725 |
# Processar resposta
|
| 726 |
try:
|
| 727 |
data = clean_json_response(response_text)
|
| 728 |
chunk_segments = data.get("segments", [])
|
| 729 |
print(f"Encontrados {len(chunk_segments)} segmentos neste chunk.")
|
| 730 |
+
all_raw_segments.extend(chunk_segments)
|
| 731 |
except json.JSONDecodeError:
|
| 732 |
+
print(f"Erro: Resposta inválida.")
|
|
|
|
| 733 |
except Exception as e:
|
| 734 |
print(f"Erro desconhecido ao processar chunk: {e}")
|
| 735 |
|
| 736 |
+
# Call the alignment / processing logic
|
| 737 |
+
return process_segments(
|
| 738 |
+
all_raw_segments,
|
| 739 |
+
transcript_segments,
|
| 740 |
+
tempo_minimo,
|
| 741 |
+
tempo_maximo,
|
| 742 |
+
output_count=quantidade_de_virals
|
| 743 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/download_video.py
CHANGED
|
@@ -6,8 +6,18 @@ from i18n.i18n import I18nAuto
|
|
| 6 |
i18n = I18nAuto()
|
| 7 |
|
| 8 |
def sanitize_filename(name):
|
| 9 |
-
"""Remove caracteres inválidos para
|
|
|
|
| 10 |
cleaned = re.sub(r'[\\/*?:"<>|]', "", name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
cleaned = cleaned.strip()
|
| 12 |
return cleaned
|
| 13 |
|
|
@@ -38,8 +48,11 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
|
|
| 38 |
info = ydl.extract_info(url, download=False)
|
| 39 |
title = info.get('title')
|
| 40 |
except Exception as e:
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
# Tentativa 2: Sem cookies
|
| 44 |
if not title:
|
| 45 |
try:
|
|
@@ -47,12 +60,20 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
|
|
| 47 |
info = ydl.extract_info(url, download=False)
|
| 48 |
title = info.get('title')
|
| 49 |
except Exception as e:
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Fallback final
|
| 53 |
if title:
|
| 54 |
safe_title = sanitize_filename(title)
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
else:
|
| 57 |
print(i18n("WARNING: Title could not be obtained. Using 'Unknown_Video'."))
|
| 58 |
safe_title = i18n("Unknown_Video")
|
|
@@ -69,7 +90,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
|
|
| 69 |
# Verificação inteligente
|
| 70 |
if os.path.exists(final_video_path):
|
| 71 |
if os.path.getsize(final_video_path) > 1024:
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
| 73 |
print(i18n("Skipping download and reusing local file."))
|
| 74 |
return final_video_path, project_folder
|
| 75 |
else:
|
|
@@ -127,7 +151,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
|
|
| 127 |
'format': 'srt',
|
| 128 |
}]
|
| 129 |
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
# Tentativa 1: Com configuração original
|
| 133 |
try:
|
|
@@ -178,7 +205,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
|
|
| 178 |
new_name = os.path.join(project_folder, "input.srt") # Vamos padronizar tudo para .srt
|
| 179 |
|
| 180 |
if ext.lower() == '.vtt':
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
| 182 |
try:
|
| 183 |
with open(best_sub, 'r', encoding='utf-8') as f:
|
| 184 |
lines = f.readlines()
|
|
@@ -252,7 +282,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
|
|
| 252 |
with open(new_name, 'w', encoding='utf-8') as f_out:
|
| 253 |
f_out.writelines(srt_content)
|
| 254 |
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
| 256 |
try: os.remove(best_sub)
|
| 257 |
except: pass
|
| 258 |
|
|
@@ -271,7 +304,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
|
|
| 271 |
try: os.remove(new_name)
|
| 272 |
except: pass
|
| 273 |
os.rename(best_sub, new_name)
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
# Limpa sobras
|
| 277 |
for extra in potential_subs[1:]:
|
|
|
|
| 6 |
i18n = I18nAuto()
|
| 7 |
|
| 8 |
def sanitize_filename(name):
|
| 9 |
+
"""Remove caracteres inválidos e emojis para evitar erro de encoding no Windows."""
|
| 10 |
+
# Remove caracteres reservados do sistema de arquivos
|
| 11 |
cleaned = re.sub(r'[\\/*?:"<>|]', "", name)
|
| 12 |
+
|
| 13 |
+
# Remove emojis e caracteres não suportados pelo console Windows (CP1252)
|
| 14 |
+
# Isso mantém acentos (á, ç, é) mas remove 😱, etc.
|
| 15 |
+
try:
|
| 16 |
+
cleaned = cleaned.encode('cp1252', 'ignore').decode('cp1252')
|
| 17 |
+
except:
|
| 18 |
+
# Fallback se não tiver CP1252: remove tudo não-ascii (remove acentos)
|
| 19 |
+
cleaned = cleaned.encode('ascii', 'ignore').decode('ascii')
|
| 20 |
+
|
| 21 |
cleaned = cleaned.strip()
|
| 22 |
return cleaned
|
| 23 |
|
|
|
|
| 48 |
info = ydl.extract_info(url, download=False)
|
| 49 |
title = info.get('title')
|
| 50 |
except Exception as e:
|
| 51 |
+
try:
|
| 52 |
+
print(i18n("Warning: Failed to extract info with cookies: {}").format(e))
|
| 53 |
+
except UnicodeEncodeError:
|
| 54 |
+
print(i18n("Warning: Failed to extract info with cookies: [Encoding Error in Message]"))
|
| 55 |
+
|
| 56 |
# Tentativa 2: Sem cookies
|
| 57 |
if not title:
|
| 58 |
try:
|
|
|
|
| 60 |
info = ydl.extract_info(url, download=False)
|
| 61 |
title = info.get('title')
|
| 62 |
except Exception as e:
|
| 63 |
+
try:
|
| 64 |
+
print(i18n("Error getting video info (without cookies): {}").format(e))
|
| 65 |
+
except UnicodeEncodeError:
|
| 66 |
+
print(i18n("Error getting video info (without cookies): [Encoding Error in Message]"))
|
| 67 |
|
| 68 |
# Fallback final
|
| 69 |
if title:
|
| 70 |
safe_title = sanitize_filename(title)
|
| 71 |
+
try:
|
| 72 |
+
print(i18n("Detected title: {}").format(title))
|
| 73 |
+
except UnicodeEncodeError:
|
| 74 |
+
# Fallback for Windows consoles that choke on Emojis
|
| 75 |
+
clean_title = title.encode('ascii', 'replace').decode('ascii')
|
| 76 |
+
print(i18n("Detected title: {}").format(clean_title))
|
| 77 |
else:
|
| 78 |
print(i18n("WARNING: Title could not be obtained. Using 'Unknown_Video'."))
|
| 79 |
safe_title = i18n("Unknown_Video")
|
|
|
|
| 90 |
# Verificação inteligente
|
| 91 |
if os.path.exists(final_video_path):
|
| 92 |
if os.path.getsize(final_video_path) > 1024:
|
| 93 |
+
try:
|
| 94 |
+
print(i18n("Video already exists at: {}").format(final_video_path))
|
| 95 |
+
except UnicodeEncodeError:
|
| 96 |
+
print(i18n("Video already exists at: {}").format(final_video_path.encode('ascii', 'replace').decode('ascii')))
|
| 97 |
print(i18n("Skipping download and reusing local file."))
|
| 98 |
return final_video_path, project_folder
|
| 99 |
else:
|
|
|
|
| 151 |
'format': 'srt',
|
| 152 |
}]
|
| 153 |
|
| 154 |
+
try:
|
| 155 |
+
print(i18n("Downloading video to: {}...").format(project_folder))
|
| 156 |
+
except UnicodeEncodeError:
|
| 157 |
+
print(i18n("Downloading video to: {}...").format(project_folder.encode('ascii', 'replace').decode('ascii')))
|
| 158 |
|
| 159 |
# Tentativa 1: Com configuração original
|
| 160 |
try:
|
|
|
|
| 205 |
new_name = os.path.join(project_folder, "input.srt") # Vamos padronizar tudo para .srt
|
| 206 |
|
| 207 |
if ext.lower() == '.vtt':
|
| 208 |
+
try:
|
| 209 |
+
print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub)))
|
| 210 |
+
except UnicodeEncodeError:
|
| 211 |
+
print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub).encode('ascii', 'replace').decode('ascii')))
|
| 212 |
try:
|
| 213 |
with open(best_sub, 'r', encoding='utf-8') as f:
|
| 214 |
lines = f.readlines()
|
|
|
|
| 282 |
with open(new_name, 'w', encoding='utf-8') as f_out:
|
| 283 |
f_out.writelines(srt_content)
|
| 284 |
|
| 285 |
+
try:
|
| 286 |
+
print(i18n("Subtitle converted and cleaned: {}").format(new_name))
|
| 287 |
+
except UnicodeEncodeError:
|
| 288 |
+
print(i18n("Subtitle converted and cleaned: {}").format(new_name.encode('ascii', 'replace').decode('ascii')))
|
| 289 |
try: os.remove(best_sub)
|
| 290 |
except: pass
|
| 291 |
|
|
|
|
| 304 |
try: os.remove(new_name)
|
| 305 |
except: pass
|
| 306 |
os.rename(best_sub, new_name)
|
| 307 |
+
try:
|
| 308 |
+
print(i18n("SRT subtitle renamed to: {}").format(new_name))
|
| 309 |
+
except UnicodeEncodeError:
|
| 310 |
+
print(i18n("SRT subtitle renamed to: {}").format(new_name.encode('ascii', 'replace').decode('ascii')))
|
| 311 |
|
| 312 |
# Limpa sobras
|
| 313 |
for extra in potential_subs[1:]:
|
scripts/edit_video.py
CHANGED
|
@@ -3,7 +3,7 @@ import numpy as np
|
|
| 3 |
import os
|
| 4 |
import subprocess
|
| 5 |
import mediapipe as mp
|
| 6 |
-
from scripts.one_face import crop_and_resize_single_face, resize_with_padding, detect_face_or_body
|
| 7 |
from scripts.two_face import crop_and_resize_two_faces, detect_face_or_body_two_faces
|
| 8 |
try:
|
| 9 |
from scripts.face_detection_insightface import init_insightface, detect_faces_insightface, crop_and_resize_insightface
|
|
@@ -12,6 +12,48 @@ except ImportError:
|
|
| 12 |
INSIGHTFACE_AVAILABLE = False
|
| 13 |
print("InsightFace not found or error importing. Install with: pip install insightface onnxruntime-gpu")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def get_center_bbox(bbox):
|
| 16 |
# bbox: [x1, y1, x2, y2]
|
| 17 |
return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
|
@@ -52,9 +94,9 @@ def sort_by_proximity(new_faces, old_faces, center_func):
|
|
| 52 |
|
| 53 |
return new_faces
|
| 54 |
|
| 55 |
-
def generate_short_fallback(input_file, output_file, index, project_folder, final_folder):
|
| 56 |
-
"""Fallback function: Center Crop if
|
| 57 |
-
print(f"Processing (
|
| 58 |
cap = cv2.VideoCapture(input_file)
|
| 59 |
if not cap.isOpened():
|
| 60 |
print(f"Error opening video: {input_file}")
|
|
@@ -65,9 +107,12 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
|
|
| 65 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 66 |
|
| 67 |
# Target dimensions (9:16)
|
|
|
|
| 68 |
target_width = 1080
|
| 69 |
target_height = 1920
|
| 70 |
|
|
|
|
|
|
|
| 71 |
# Use FFmpeg Pipe instead of cv2.VideoWriter to avoid OpenCV backend errors
|
| 72 |
ffmpeg_cmd = [
|
| 73 |
'ffmpeg', '-y', '-loglevel', 'error', '-hide_banner', '-stats',
|
|
@@ -77,12 +122,16 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
|
|
| 77 |
'-pix_fmt', 'bgr24',
|
| 78 |
'-r', str(fps),
|
| 79 |
'-i', '-',
|
| 80 |
-
'-c:v',
|
| 81 |
-
'-preset',
|
| 82 |
'-pix_fmt', 'yuv420p',
|
| 83 |
output_file
|
| 84 |
]
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
|
| 87 |
|
| 88 |
while True:
|
|
@@ -90,38 +139,19 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
|
|
| 90 |
if not ret:
|
| 91 |
break
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
scale_factor = target_width / width
|
| 98 |
-
|
| 99 |
-
# Garante dimensoes inteiras
|
| 100 |
-
new_w = int(width * scale_factor)
|
| 101 |
-
new_h = int(height * scale_factor)
|
| 102 |
-
|
| 103 |
-
resized = cv2.resize(frame, (new_w, new_h))
|
| 104 |
-
|
| 105 |
-
# Crop center
|
| 106 |
-
res_h, res_w, _ = resized.shape
|
| 107 |
-
start_x = (res_w - target_width) // 2
|
| 108 |
-
start_y = (res_h - target_height) // 2
|
| 109 |
-
|
| 110 |
-
if start_x < 0: start_x = 0
|
| 111 |
-
if start_y < 0: start_y = 0
|
| 112 |
-
|
| 113 |
-
cropped = resized[start_y:start_y+target_height, start_x:start_x+target_width]
|
| 114 |
-
|
| 115 |
-
# Resize final por segurança e validação
|
| 116 |
-
if cropped.shape[1] != target_width or cropped.shape[0] != target_height:
|
| 117 |
-
cropped = cv2.resize(cropped, (target_width, target_height))
|
| 118 |
|
| 119 |
try:
|
| 120 |
# Write raw bytes to ffmpeg stdin
|
| 121 |
-
process.stdin.write(
|
| 122 |
except Exception as e:
|
| 123 |
print(f"Error writing frame to ffmpeg pipe: {e}")
|
| 124 |
pass
|
|
|
|
|
|
|
| 125 |
|
| 126 |
cap.release()
|
| 127 |
process.stdin.close()
|
|
@@ -137,11 +167,12 @@ def finalize_video(input_file, output_file, index, fps, project_folder, final_fo
|
|
| 137 |
|
| 138 |
if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0:
|
| 139 |
final_output = os.path.join(final_folder, f"final-output{str(index).zfill(3)}_processed.mp4")
|
|
|
|
| 140 |
command = [
|
| 141 |
"ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
|
| 142 |
"-i", output_file,
|
| 143 |
"-i", audio_file,
|
| 144 |
-
"-c:v",
|
| 145 |
"-c:a", "aac", "-b:a", "192k",
|
| 146 |
"-r", str(fps),
|
| 147 |
final_output
|
|
@@ -191,7 +222,7 @@ def calculate_mouth_ratio(landmarks):
|
|
| 191 |
|
| 192 |
return h / w
|
| 193 |
|
| 194 |
-
def generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=None):
|
| 195 |
try:
|
| 196 |
cap = cv2.VideoCapture(input_file)
|
| 197 |
if not cap.isOpened():
|
|
@@ -218,8 +249,8 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
|
|
| 218 |
|
| 219 |
last_detected_faces = None
|
| 220 |
last_frame_face_positions = None
|
| 221 |
-
|
| 222 |
-
max_frames_without_detection = int(
|
| 223 |
|
| 224 |
transition_duration = int(fps)
|
| 225 |
transition_frames = []
|
|
@@ -275,9 +306,9 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
|
|
| 275 |
else:
|
| 276 |
transition_frames = []
|
| 277 |
last_detected_faces = current_detections
|
| 278 |
-
|
| 279 |
else:
|
| 280 |
-
|
| 281 |
|
| 282 |
# Update next detection frame
|
| 283 |
step = 5
|
|
@@ -300,10 +331,13 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
|
|
| 300 |
if len(transition_frames) > 0:
|
| 301 |
current_faces = transition_frames[0]
|
| 302 |
transition_frames = transition_frames[1:]
|
| 303 |
-
elif last_detected_faces is not None and
|
| 304 |
current_faces = last_detected_faces
|
| 305 |
else:
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
| 307 |
coordinate_log.append({"frame": frame_index, "faces": []})
|
| 308 |
out.write(result)
|
| 309 |
continue
|
|
@@ -319,7 +353,10 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
|
|
| 319 |
f = current_faces[0]
|
| 320 |
result = crop_and_resize_single_face(frame, f)
|
| 321 |
else:
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
out.write(result)
|
| 325 |
|
|
@@ -332,7 +369,7 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
|
|
| 332 |
print(f"Error in MediaPipe processing: {e}")
|
| 333 |
raise e # Rethrow to trigger fallback
|
| 334 |
|
| 335 |
-
def generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=None):
|
| 336 |
"""Face detection using OpenCV Haar Cascades."""
|
| 337 |
print(f"Processing (Haar Cascade): {input_file}")
|
| 338 |
|
|
@@ -361,8 +398,8 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
|
|
| 361 |
detection_interval = max(1, int(detection_period * fps))
|
| 362 |
last_detected_faces = None
|
| 363 |
last_frame_face_positions = None
|
| 364 |
-
|
| 365 |
-
max_frames_without_detection = int(
|
| 366 |
|
| 367 |
transition_duration = int(fps) # 1 second smooth transition
|
| 368 |
transition_frames = []
|
|
@@ -399,18 +436,21 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
|
|
| 399 |
else:
|
| 400 |
transition_frames = []
|
| 401 |
last_detected_faces = detections
|
| 402 |
-
|
| 403 |
else:
|
| 404 |
-
|
| 405 |
|
| 406 |
if len(transition_frames) > 0:
|
| 407 |
current_faces = transition_frames[0]
|
| 408 |
transition_frames = transition_frames[1:]
|
| 409 |
-
elif last_detected_faces is not None and
|
| 410 |
current_faces = last_detected_faces
|
| 411 |
else:
|
| 412 |
# No face detected for a while -> Center/Padding fallback
|
| 413 |
-
|
|
|
|
|
|
|
|
|
|
| 414 |
out.write(result)
|
| 415 |
continue
|
| 416 |
|
|
@@ -434,7 +474,7 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
|
|
| 434 |
|
| 435 |
finalize_video(input_file, output_file, index, fps, project_folder, final_folder)
|
| 436 |
|
| 437 |
-
def generate_short_insightface(input_file, output_file, index, project_folder, final_folder, face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0):
|
| 438 |
"""Face detection using InsightFace (SOTA)."""
|
| 439 |
print(f"Processing (InsightFace): {input_file} | Mode: {face_mode}")
|
| 440 |
|
|
@@ -457,8 +497,8 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
|
|
| 457 |
|
| 458 |
last_detected_faces = None
|
| 459 |
last_frame_face_positions = None
|
| 460 |
-
|
| 461 |
-
max_frames_without_detection =
|
| 462 |
|
| 463 |
transition_duration = 4 # Smooth transition over 4 frames (almost continuous)
|
| 464 |
transition_frames = []
|
|
@@ -872,9 +912,10 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
|
|
| 872 |
# Reset transition if face count changed or first detect
|
| 873 |
transition_frames = []
|
| 874 |
last_detected_faces = detections
|
| 875 |
-
|
| 876 |
else:
|
| 877 |
-
|
|
|
|
| 878 |
|
| 879 |
# Update next detection frame based on NEW state
|
| 880 |
step = 5 # Default fallback (very fast)
|
|
@@ -899,11 +940,14 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
|
|
| 899 |
if len(transition_frames) > 0:
|
| 900 |
current_faces = transition_frames[0]
|
| 901 |
transition_frames = transition_frames[1:]
|
| 902 |
-
elif last_detected_faces is not None and
|
| 903 |
current_faces = last_detected_faces
|
| 904 |
else:
|
| 905 |
# Fallback for this frame
|
| 906 |
-
|
|
|
|
|
|
|
|
|
|
| 907 |
out.write(result)
|
| 908 |
continue
|
| 909 |
|
|
@@ -1017,7 +1061,7 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
|
|
| 1017 |
return "1"
|
| 1018 |
|
| 1019 |
|
| 1020 |
-
def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, segments_data=None):
|
| 1021 |
# Lazy init solutions only when needed to avoid AttributeError if import failed partially
|
| 1022 |
mp_face_detection = None
|
| 1023 |
mp_face_mesh = None
|
|
@@ -1118,7 +1162,8 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec
|
|
| 1118 |
active_speaker_mar=active_speaker_mar, active_speaker_score_diff=active_speaker_score_diff, include_motion=include_motion,
|
| 1119 |
active_speaker_motion_deadzone=active_speaker_motion_deadzone,
|
| 1120 |
active_speaker_motion_sensitivity=active_speaker_motion_sensitivity,
|
| 1121 |
-
active_speaker_decay=active_speaker_decay
|
|
|
|
| 1122 |
if res: detected_mode = res
|
| 1123 |
success = True
|
| 1124 |
except Exception as e:
|
|
@@ -1134,7 +1179,7 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec
|
|
| 1134 |
mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=2, refine_landmarks=True, min_detection_confidence=0.2, min_tracking_confidence=0.2) as face_mesh, \
|
| 1135 |
mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
|
| 1136 |
|
| 1137 |
-
generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=detection_period)
|
| 1138 |
# We don't easily know detected mode here without return, assuming '1' or '2' based on last frame?
|
| 1139 |
# Ideally function should return as well.
|
| 1140 |
detected_mode = "1" # Placeholder, user didn't complain about stats.
|
|
@@ -1149,14 +1194,14 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec
|
|
| 1149 |
if not success and (use_haar or (not mediapipe_working and not insightface_working)):
|
| 1150 |
try:
|
| 1151 |
print("Attempts with Haar Cascade...")
|
| 1152 |
-
generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=detection_period)
|
| 1153 |
success = True
|
| 1154 |
except Exception as e2:
|
| 1155 |
print(f"Haar fallback also failed: {e2}")
|
| 1156 |
|
| 1157 |
# 4. Last Resort: Center Crop
|
| 1158 |
if not success:
|
| 1159 |
-
generate_short_fallback(input_file, output_file, index, project_folder, final_folder)
|
| 1160 |
detected_mode = "1"
|
| 1161 |
success = True
|
| 1162 |
|
|
|
|
| 3 |
import os
|
| 4 |
import subprocess
|
| 5 |
import mediapipe as mp
|
| 6 |
+
from scripts.one_face import crop_and_resize_single_face, resize_with_padding, detect_face_or_body, crop_center_zoom
|
| 7 |
from scripts.two_face import crop_and_resize_two_faces, detect_face_or_body_two_faces
|
| 8 |
try:
|
| 9 |
from scripts.face_detection_insightface import init_insightface, detect_faces_insightface, crop_and_resize_insightface
|
|
|
|
| 12 |
INSIGHTFACE_AVAILABLE = False
|
| 13 |
print("InsightFace not found or error importing. Install with: pip install insightface onnxruntime-gpu")
|
| 14 |
|
| 15 |
+
|
| 16 |
+
# Global cache for encoder
|
| 17 |
+
CACHED_ENCODER = None
|
| 18 |
+
|
| 19 |
+
def get_best_encoder():
|
| 20 |
+
global CACHED_ENCODER
|
| 21 |
+
if CACHED_ENCODER: return CACHED_ENCODER
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
# Check available encoders
|
| 25 |
+
result = subprocess.run(['ffmpeg', '-hide_banner', '-encoders'], capture_output=True, text=True)
|
| 26 |
+
output = result.stdout
|
| 27 |
+
|
| 28 |
+
# Priority: NVENC (NVIDIA) > AMF (AMD) > QSV (Intel) > CPU
|
| 29 |
+
if "h264_nvenc" in output:
|
| 30 |
+
print("Encoder Detected: NVIDIA (h264_nvenc)")
|
| 31 |
+
CACHED_ENCODER = ("h264_nvenc", "fast") # p1-p7 presets could be used but 'fast' maps well
|
| 32 |
+
return CACHED_ENCODER
|
| 33 |
+
|
| 34 |
+
if "h264_amf" in output:
|
| 35 |
+
print("Encoder Detected: AMD (h264_amf)")
|
| 36 |
+
CACHED_ENCODER = ("h264_amf", "speed") # quality, speed, balanced
|
| 37 |
+
return CACHED_ENCODER
|
| 38 |
+
|
| 39 |
+
if "h264_qsv" in output:
|
| 40 |
+
print("Encoder Detected: Intel QSV (h264_qsv)")
|
| 41 |
+
CACHED_ENCODER = ("h264_qsv", "veryfast")
|
| 42 |
+
return CACHED_ENCODER
|
| 43 |
+
|
| 44 |
+
# Mac OS (VideoToolbox)
|
| 45 |
+
if "h264_videotoolbox" in output:
|
| 46 |
+
print("Encoder Detected: MacOS (h264_videotoolbox)")
|
| 47 |
+
CACHED_ENCODER = ("h264_videotoolbox", "default")
|
| 48 |
+
return CACHED_ENCODER
|
| 49 |
+
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"Error checking encoders: {e}")
|
| 52 |
+
|
| 53 |
+
print("Encoder Detected: CPU (libx264)")
|
| 54 |
+
CACHED_ENCODER = ("libx264", "ultrafast")
|
| 55 |
+
return CACHED_ENCODER
|
| 56 |
+
|
| 57 |
def get_center_bbox(bbox):
|
| 58 |
# bbox: [x1, y1, x2, y2]
|
| 59 |
return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
|
|
|
| 94 |
|
| 95 |
return new_faces
|
| 96 |
|
| 97 |
+
def generate_short_fallback(input_file, output_file, index, project_folder, final_folder, no_face_mode="padding"):
|
| 98 |
+
"""Fallback function: Center Crop (Zoom) or Padding if detection fails."""
|
| 99 |
+
print(f"Processing (Fallback): {input_file} | Mode: {no_face_mode}")
|
| 100 |
cap = cv2.VideoCapture(input_file)
|
| 101 |
if not cap.isOpened():
|
| 102 |
print(f"Error opening video: {input_file}")
|
|
|
|
| 107 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 108 |
|
| 109 |
# Target dimensions (9:16)
|
| 110 |
+
|
| 111 |
target_width = 1080
|
| 112 |
target_height = 1920
|
| 113 |
|
| 114 |
+
encoder_name, encoder_preset = get_best_encoder()
|
| 115 |
+
|
| 116 |
# Use FFmpeg Pipe instead of cv2.VideoWriter to avoid OpenCV backend errors
|
| 117 |
ffmpeg_cmd = [
|
| 118 |
'ffmpeg', '-y', '-loglevel', 'error', '-hide_banner', '-stats',
|
|
|
|
| 122 |
'-pix_fmt', 'bgr24',
|
| 123 |
'-r', str(fps),
|
| 124 |
'-i', '-',
|
| 125 |
+
'-c:v', encoder_name,
|
| 126 |
+
'-preset', encoder_preset,
|
| 127 |
'-pix_fmt', 'yuv420p',
|
| 128 |
output_file
|
| 129 |
]
|
| 130 |
|
| 131 |
+
# If using hardware encoder, we might want to set bitrate to ensure quality
|
| 132 |
+
if "nvenc" in encoder_name or "amf" in encoder_name:
|
| 133 |
+
ffmpeg_cmd.extend(["-b:v", "5M"])
|
| 134 |
+
|
| 135 |
process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
|
| 136 |
|
| 137 |
while True:
|
|
|
|
| 139 |
if not ret:
|
| 140 |
break
|
| 141 |
|
| 142 |
+
if no_face_mode == "zoom":
|
| 143 |
+
result = crop_center_zoom(frame)
|
| 144 |
+
else:
|
| 145 |
+
result = resize_with_padding(frame)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
try:
|
| 148 |
# Write raw bytes to ffmpeg stdin
|
| 149 |
+
process.stdin.write(result.tobytes())
|
| 150 |
except Exception as e:
|
| 151 |
print(f"Error writing frame to ffmpeg pipe: {e}")
|
| 152 |
pass
|
| 153 |
+
|
| 154 |
+
|
| 155 |
|
| 156 |
cap.release()
|
| 157 |
process.stdin.close()
|
|
|
|
| 167 |
|
| 168 |
if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0:
|
| 169 |
final_output = os.path.join(final_folder, f"final-output{str(index).zfill(3)}_processed.mp4")
|
| 170 |
+
encoder_name, encoder_preset = get_best_encoder()
|
| 171 |
command = [
|
| 172 |
"ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
|
| 173 |
"-i", output_file,
|
| 174 |
"-i", audio_file,
|
| 175 |
+
"-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "5M",
|
| 176 |
"-c:a", "aac", "-b:a", "192k",
|
| 177 |
"-r", str(fps),
|
| 178 |
final_output
|
|
|
|
| 222 |
|
| 223 |
return h / w
|
| 224 |
|
| 225 |
+
def generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=None, no_face_mode="padding"):
|
| 226 |
try:
|
| 227 |
cap = cv2.VideoCapture(input_file)
|
| 228 |
if not cap.isOpened():
|
|
|
|
| 249 |
|
| 250 |
last_detected_faces = None
|
| 251 |
last_frame_face_positions = None
|
| 252 |
+
last_success_frame = -1000
|
| 253 |
+
max_frames_without_detection = int(3.0 * fps) # 3 seconds timeout
|
| 254 |
|
| 255 |
transition_duration = int(fps)
|
| 256 |
transition_frames = []
|
|
|
|
| 306 |
else:
|
| 307 |
transition_frames = []
|
| 308 |
last_detected_faces = current_detections
|
| 309 |
+
last_success_frame = frame_index
|
| 310 |
else:
|
| 311 |
+
pass
|
| 312 |
|
| 313 |
# Update next detection frame
|
| 314 |
step = 5
|
|
|
|
| 331 |
if len(transition_frames) > 0:
|
| 332 |
current_faces = transition_frames[0]
|
| 333 |
transition_frames = transition_frames[1:]
|
| 334 |
+
elif last_detected_faces is not None and (frame_index - last_success_frame) <= max_frames_without_detection:
|
| 335 |
current_faces = last_detected_faces
|
| 336 |
else:
|
| 337 |
+
if no_face_mode == "zoom":
|
| 338 |
+
result = crop_center_zoom(frame)
|
| 339 |
+
else:
|
| 340 |
+
result = resize_with_padding(frame)
|
| 341 |
coordinate_log.append({"frame": frame_index, "faces": []})
|
| 342 |
out.write(result)
|
| 343 |
continue
|
|
|
|
| 353 |
f = current_faces[0]
|
| 354 |
result = crop_and_resize_single_face(frame, f)
|
| 355 |
else:
|
| 356 |
+
if no_face_mode == "zoom":
|
| 357 |
+
result = crop_center_zoom(frame)
|
| 358 |
+
else:
|
| 359 |
+
result = resize_with_padding(frame)
|
| 360 |
|
| 361 |
out.write(result)
|
| 362 |
|
|
|
|
| 369 |
print(f"Error in MediaPipe processing: {e}")
|
| 370 |
raise e # Rethrow to trigger fallback
|
| 371 |
|
| 372 |
+
def generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=None, no_face_mode="padding"):
|
| 373 |
"""Face detection using OpenCV Haar Cascades."""
|
| 374 |
print(f"Processing (Haar Cascade): {input_file}")
|
| 375 |
|
|
|
|
| 398 |
detection_interval = max(1, int(detection_period * fps))
|
| 399 |
last_detected_faces = None
|
| 400 |
last_frame_face_positions = None
|
| 401 |
+
last_success_frame = -1000
|
| 402 |
+
max_frames_without_detection = int(3.0 * fps)
|
| 403 |
|
| 404 |
transition_duration = int(fps) # 1 second smooth transition
|
| 405 |
transition_frames = []
|
|
|
|
| 436 |
else:
|
| 437 |
transition_frames = []
|
| 438 |
last_detected_faces = detections
|
| 439 |
+
last_success_frame = frame_index
|
| 440 |
else:
|
| 441 |
+
pass
|
| 442 |
|
| 443 |
if len(transition_frames) > 0:
|
| 444 |
current_faces = transition_frames[0]
|
| 445 |
transition_frames = transition_frames[1:]
|
| 446 |
+
elif last_detected_faces is not None and (frame_index - last_success_frame) <= max_frames_without_detection:
|
| 447 |
current_faces = last_detected_faces
|
| 448 |
else:
|
| 449 |
# No face detected for a while -> Center/Padding fallback
|
| 450 |
+
if no_face_mode == "zoom":
|
| 451 |
+
result = crop_center_zoom(frame)
|
| 452 |
+
else:
|
| 453 |
+
result = resize_with_padding(frame)
|
| 454 |
out.write(result)
|
| 455 |
continue
|
| 456 |
|
|
|
|
| 474 |
|
| 475 |
finalize_video(input_file, output_file, index, fps, project_folder, final_folder)
|
| 476 |
|
| 477 |
+
def generate_short_insightface(input_file, output_file, index, project_folder, final_folder, face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, no_face_mode="padding"):
|
| 478 |
"""Face detection using InsightFace (SOTA)."""
|
| 479 |
print(f"Processing (InsightFace): {input_file} | Mode: {face_mode}")
|
| 480 |
|
|
|
|
| 497 |
|
| 498 |
last_detected_faces = None
|
| 499 |
last_frame_face_positions = None
|
| 500 |
+
last_success_frame = -1000
|
| 501 |
+
max_frames_without_detection = int(3.0 * fps) # 3 seconds timeout
|
| 502 |
|
| 503 |
transition_duration = 4 # Smooth transition over 4 frames (almost continuous)
|
| 504 |
transition_frames = []
|
|
|
|
| 912 |
# Reset transition if face count changed or first detect
|
| 913 |
transition_frames = []
|
| 914 |
last_detected_faces = detections
|
| 915 |
+
last_success_frame = frame_index
|
| 916 |
else:
|
| 917 |
+
pass
|
| 918 |
+
|
| 919 |
|
| 920 |
# Update next detection frame based on NEW state
|
| 921 |
step = 5 # Default fallback (very fast)
|
|
|
|
| 940 |
if len(transition_frames) > 0:
|
| 941 |
current_faces = transition_frames[0]
|
| 942 |
transition_frames = transition_frames[1:]
|
| 943 |
+
elif last_detected_faces is not None and (frame_index - last_success_frame) <= max_frames_without_detection:
|
| 944 |
current_faces = last_detected_faces
|
| 945 |
else:
|
| 946 |
# Fallback for this frame
|
| 947 |
+
if no_face_mode == "zoom":
|
| 948 |
+
result = crop_center_zoom(frame)
|
| 949 |
+
else:
|
| 950 |
+
result = resize_with_padding(frame)
|
| 951 |
out.write(result)
|
| 952 |
continue
|
| 953 |
|
|
|
|
| 1061 |
return "1"
|
| 1062 |
|
| 1063 |
|
| 1064 |
+
def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, segments_data=None, no_face_mode="padding"):
|
| 1065 |
# Lazy init solutions only when needed to avoid AttributeError if import failed partially
|
| 1066 |
mp_face_detection = None
|
| 1067 |
mp_face_mesh = None
|
|
|
|
| 1162 |
active_speaker_mar=active_speaker_mar, active_speaker_score_diff=active_speaker_score_diff, include_motion=include_motion,
|
| 1163 |
active_speaker_motion_deadzone=active_speaker_motion_deadzone,
|
| 1164 |
active_speaker_motion_sensitivity=active_speaker_motion_sensitivity,
|
| 1165 |
+
active_speaker_decay=active_speaker_decay,
|
| 1166 |
+
no_face_mode=no_face_mode)
|
| 1167 |
if res: detected_mode = res
|
| 1168 |
success = True
|
| 1169 |
except Exception as e:
|
|
|
|
| 1179 |
mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=2, refine_landmarks=True, min_detection_confidence=0.2, min_tracking_confidence=0.2) as face_mesh, \
|
| 1180 |
mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
|
| 1181 |
|
| 1182 |
+
generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=detection_period, no_face_mode=no_face_mode)
|
| 1183 |
# We don't easily know detected mode here without return, assuming '1' or '2' based on last frame?
|
| 1184 |
# Ideally function should return as well.
|
| 1185 |
detected_mode = "1" # Placeholder, user didn't complain about stats.
|
|
|
|
| 1194 |
if not success and (use_haar or (not mediapipe_working and not insightface_working)):
|
| 1195 |
try:
|
| 1196 |
print("Attempts with Haar Cascade...")
|
| 1197 |
+
generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=detection_period, no_face_mode=no_face_mode)
|
| 1198 |
success = True
|
| 1199 |
except Exception as e2:
|
| 1200 |
print(f"Haar fallback also failed: {e2}")
|
| 1201 |
|
| 1202 |
# 4. Last Resort: Center Crop
|
| 1203 |
if not success:
|
| 1204 |
+
generate_short_fallback(input_file, output_file, index, project_folder, final_folder, no_face_mode=no_face_mode)
|
| 1205 |
detected_mode = "1"
|
| 1206 |
success = True
|
| 1207 |
|
scripts/one_face.py
CHANGED
|
@@ -107,3 +107,33 @@ def detect_face_or_body(frame, face_detection, face_mesh, pose):
|
|
| 107 |
# Se nada for detectado, retornar uma lista vazia
|
| 108 |
return detections if detections else None
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
# Se nada for detectado, retornar uma lista vazia
|
| 108 |
return detections if detections else None
|
| 109 |
|
| 110 |
+
|
| 111 |
+
def crop_center_zoom(frame):
|
| 112 |
+
"""
|
| 113 |
+
Crops the center of the frame to fill 9:16 aspect ratio (Zoom effect).
|
| 114 |
+
"""
|
| 115 |
+
frame_height, frame_width = frame.shape[:2]
|
| 116 |
+
target_aspect_ratio = 9 / 16
|
| 117 |
+
|
| 118 |
+
# Calculate crop dimensions to FILL the target ratio
|
| 119 |
+
if frame_width / frame_height > target_aspect_ratio:
|
| 120 |
+
# Source is wider than target (e.g. 16:9 source, 9:16 target) -> Crop Width
|
| 121 |
+
new_width = int(frame_height * target_aspect_ratio)
|
| 122 |
+
new_height = frame_height
|
| 123 |
+
else:
|
| 124 |
+
# Source is taller than target -> Crop Height
|
| 125 |
+
new_width = frame_width
|
| 126 |
+
new_height = int(frame_width / target_aspect_ratio)
|
| 127 |
+
|
| 128 |
+
start_x = (frame_width - new_width) // 2
|
| 129 |
+
start_y = (frame_height - new_height) // 2
|
| 130 |
+
|
| 131 |
+
# Ensure bounds
|
| 132 |
+
start_x = max(0, start_x)
|
| 133 |
+
start_y = max(0, start_y)
|
| 134 |
+
|
| 135 |
+
crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
|
| 136 |
+
|
| 137 |
+
# Resize to final 1080x1920
|
| 138 |
+
return cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA)
|
| 139 |
+
|