RafaG commited on
Commit
ac8854d
·
verified ·
1 Parent(s): 1934649

Upload 24 files

Browse files
scripts/create_viral_segments.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import re
4
  import sys
5
  import time
 
6
 
7
  # Tenta importar bibliotecas de IA opcionalmente
8
  try:
@@ -11,6 +12,9 @@ try:
11
  except ImportError:
12
  HAS_GEMINI = False
13
 
 
 
 
14
  except ImportError:
15
  HAS_G4F = False
16
 
@@ -21,37 +25,108 @@ except ImportError:
21
  HAS_LLAMA_CPP = False
22
 
23
  def clean_json_response(response_text):
24
- """Limpa blocos de código markdown do texto de resposta."""
 
 
 
 
 
 
25
  if not response_text:
26
  return {"segments": []}
27
- # Remove ```json ... ```
28
- # First, try to remove ```json ... ``` or just ``` ... ```
29
- pattern = r"```json(.*?)```"
30
- match = re.search(pattern, response_text, re.DOTALL)
31
- if match:
32
- response_text = match.group(1)
33
- else:
34
- pattern_generic = r"```(.*?)```"
35
- match_generic = re.search(pattern_generic, response_text, re.DOTALL)
36
- if match_generic:
37
- response_text = match_generic.group(1)
38
-
39
- # Always attempt to extract from outermost curly braces,
40
- # as some models chatter before/after the code block
41
- start_idx = response_text.find("{")
42
- end_idx = response_text.rfind("}")
43
 
44
- if start_idx != -1 and end_idx != -1:
45
- response_text = response_text[start_idx : end_idx + 1]
 
 
 
 
 
 
 
 
 
46
 
47
- return json.loads(response_text.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
 
50
  def preprocess_transcript_for_ai(segments):
51
  """
52
  Concatenates transcript segments into a single string with embedded time tags.
53
- Tags are inserted at the beginning (0s) and roughly every 4 seconds thereafter.
54
- Format: "Word word word (4s) word word..."
55
  """
56
  if not segments:
57
  return ""
@@ -68,10 +143,8 @@ def preprocess_transcript_for_ai(segments):
68
  text = seg.get('text', '').strip()
69
  end_time = seg.get('end', 0)
70
 
71
- # Add text
72
  full_text += text + " "
73
 
74
- # Add tag if ~4 seconds passed since last tag
75
  if end_time - last_tag_time >= 4:
76
  full_text += f"({int(end_time)}s) "
77
  last_tag_time = end_time
@@ -96,12 +169,11 @@ def call_gemini(prompt, api_key, model_name='gemini-2.5-flash-lite-preview-09-20
96
  except Exception as e:
97
  error_str = str(e)
98
  if "429" in error_str or "Quota exceeded" in error_str:
99
- wait_time = base_wait * (attempt + 1) # Backoff default
100
 
101
- # Try to find specific wait time in error message
102
  match = re.search(r"retry in (\d+(\.\d+)?)s", error_str)
103
  if match:
104
- wait_time = float(match.group(1)) + 5.0 # Add 5s buffer
105
 
106
  print(f"[429] Quota Exceeded. Waiting {wait_time:.2f}s before retry {attempt+1}/{max_retries}...", flush=True)
107
  time.sleep(wait_time)
@@ -117,25 +189,53 @@ def call_g4f(prompt, model_name="gpt-4o-mini"):
117
  if not HAS_G4F:
118
  raise ImportError("A biblioteca 'g4f' não está instalada. Instale com: pip install g4f")
119
 
120
- try:
121
- # Tenta usar um provider automático
122
- response = g4f.ChatCompletion.create(
123
- model=model_name,
124
- messages=[{"role": "user", "content": prompt}],
125
- )
126
- return response
127
- except Exception as e:
128
- print(f"Erro na API do G4F: {e}")
129
- return "{}"
130
-
131
- def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode="manual", api_key=None, project_folder="tmp", chunk_size_arg=None, model_name_arg=None):
132
- quantidade_de_virals = num_segments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- # Ler transcrição
 
135
  input_tsv = os.path.join(project_folder, 'input.tsv')
136
  input_srt = os.path.join(project_folder, 'input.srt')
137
 
138
- # Parse Input into Segments first
139
  transcript_segments = []
140
 
141
  # Try to load TSV first (more reliable time)
@@ -162,8 +262,6 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
162
  if not transcript_segments and os.path.exists(input_srt):
163
  with open(input_srt, 'r', encoding='utf-8') as f:
164
  srt_content = f.read()
165
- # Simple SRT Regex Parser
166
- # Matches: index, time range, text
167
  pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n((?:(?!\n\n).)*)', re.DOTALL)
168
  matches = pattern.findall(srt_content)
169
 
@@ -179,11 +277,187 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
179
 
180
  if not transcript_segments:
181
  raise ValueError("Could not parse transcript from TSV or SRT.")
 
 
182
 
183
- # Generate Pre-processed Content with Time Tags
184
- formatted_content = preprocess_transcript_for_ai(transcript_segments)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- # Use formatted content for chunking
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  content = formatted_content
188
 
189
  # Load Config and Prompt
@@ -191,7 +465,6 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
191
  config_path = os.path.join(base_dir, 'api_config.json')
192
  prompt_path = os.path.join(base_dir, 'prompt.txt')
193
 
194
- # Default Config
195
  config = {
196
  "selected_api": "gemini",
197
  "gemini": {
@@ -209,38 +482,31 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
209
  try:
210
  with open(config_path, 'r', encoding='utf-8') as f:
211
  loaded_config = json.load(f)
212
- # Merge simples
213
  if "gemini" in loaded_config: config["gemini"].update(loaded_config["gemini"])
214
  if "g4f" in loaded_config: config["g4f"].update(loaded_config["g4f"])
215
  if "selected_api" in loaded_config: config["selected_api"] = loaded_config["selected_api"]
216
  except Exception as e:
217
- print(f"Erro ao ler api_config.json: {e}. Usando padrões.")
218
 
219
- # Configurar variaveis baseadas no ai_mode
220
- current_chunk_size = 15000 # default fallback
221
  model_name = ""
222
 
223
  if ai_mode == "gemini":
224
  cfg_chunk = config["gemini"].get("chunk_size", 15000)
225
  current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else cfg_chunk
226
-
227
  cfg_model = config["gemini"].get("model", "gemini-2.5-flash-lite-preview-09-2025")
228
  model_name = model_name_arg if model_name_arg else cfg_model
229
-
230
- if not api_key: # Se não veio por argumento, tenta do config
231
- api_key = config["gemini"].get("api_key", "")
232
 
233
  elif ai_mode == "g4f":
234
  cfg_chunk = config["g4f"].get("chunk_size", 2000)
235
  current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else cfg_chunk
236
-
237
  cfg_model = config["g4f"].get("model", "gpt-4o-mini")
238
  model_name = model_name_arg if model_name_arg else cfg_model
239
 
240
  elif ai_mode == "local":
241
- # For local, chunk size default 3000 chars roughly matches 1024-2048 tokens depending on chars/token
242
  current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else 3000
243
- # Model name is just the argument (filename)
244
  model_name = model_name_arg if model_name_arg else ""
245
 
246
  system_prompt_template = ""
@@ -248,12 +514,11 @@ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode
248
  with open(prompt_path, 'r', encoding='utf-8') as f:
249
  system_prompt_template = f.read()
250
  else:
251
- # Fallback se arquivo nao existir
252
  print("Aviso: prompt.txt não encontrado. Usando prompt interno.")
253
  system_prompt_template = """You are a World-Class Viral Video Editor.
254
  {context_instruction}
255
  Analyze the transcript below with time tags (XXs). Find {amount} viral segments.
256
- Constraints: {min_duration}s - {max_duration}s.
257
  IMPORTANT: Output "Title", "Hook", and "Reasoning" in the SAME LANGUAGE as the transcript (e.g., if transcript is Portuguese, output Portuguese).
258
  TRANSCRIPT:
259
  {transcript_chunk}
@@ -276,11 +541,8 @@ OUTPUT JSON ONLY:
276
  }
277
  '''
278
 
279
- # Split content into chunks
280
- # Split content into chunks with OVERLAP
281
  chunk_size = int(current_chunk_size)
282
-
283
- # Define overlap size (e.g. 10% of chunk size or min 1000 chars)
284
  overlap_size = max(1000, int(chunk_size * 0.1))
285
 
286
  chunks = []
@@ -291,27 +553,16 @@ OUTPUT JSON ONLY:
291
 
292
  while start < content_len:
293
  end = min(start + chunk_size, content_len)
294
-
295
- # Align End to newline to avoid cutting sentences is useless here since we process raw text line.
296
- # But our `formatted_content` has newlines from preprocess? Actually `preprocess_transcript_for_ai` concats with " ".
297
- # So we look for space.
298
-
299
  if end < content_len:
300
  last_space = content.rfind(' ', start, end)
301
  if last_space != -1 and last_space > start:
302
  end = last_space
303
-
304
  chunk_text = content[start:end]
305
- if chunk_text.strip(): # Avoid empty chunks
306
  chunks.append(chunk_text)
307
-
308
  if end >= content_len:
309
  break
310
-
311
- # Prepare start for next chunk (Backtrack by overlap)
312
  next_start = max(start + 1, end - overlap_size)
313
-
314
- # Align next_start to space
315
  safe_space = content.rfind(' ', start, next_start)
316
  if safe_space != -1:
317
  start = safe_space + 1
@@ -329,7 +580,6 @@ OUTPUT JSON ONLY:
329
  if len(chunks) > 1:
330
  context_instruction = f"Part {i+1} of {len(chunks)}. "
331
 
332
- # Preencher o template
333
  try:
334
  prompt = system_prompt_template.format(
335
  context_instruction=context_instruction,
@@ -338,12 +588,9 @@ OUTPUT JSON ONLY:
338
  max_duration=tempo_maximo,
339
  transcript_chunk=chunk,
340
  json_template=json_template,
341
- amount=quantidade_de_virals # Caso o user use {amount} no txt
342
  )
343
  except KeyError as e:
344
- # Fallback se o user bagunçou o txt e esqueceu chaves ou colocou chaves erradas
345
- # Tenta um replace manual basico ou avisa erro, mas ideal é não quebrar.
346
- # Vamos usar replace seguro
347
  prompt = system_prompt_template
348
  prompt = prompt.replace("{context_instruction}", context_instruction)
349
  prompt = prompt.replace("{virality_instruction}", virality_instruction)
@@ -355,31 +602,26 @@ OUTPUT JSON ONLY:
355
 
356
  output_texts.append(prompt)
357
 
358
- # --- Save Full Prompt for Reference ---
359
  try:
360
  full_prompt_path = os.path.join(project_folder, "prompt_full.txt")
361
- # Prepare full prompt using replace to be safe
362
  full_prompt = system_prompt_template
363
  full_prompt = full_prompt.replace("{context_instruction}", "Full Video Transcript Analysis")
364
  full_prompt = full_prompt.replace("{virality_instruction}", virality_instruction)
365
  full_prompt = full_prompt.replace("{min_duration}", str(tempo_minimo))
366
  full_prompt = full_prompt.replace("{max_duration}", str(tempo_maximo))
367
- full_prompt = full_prompt.replace("{transcript_chunk}", content) # Full Content
368
  full_prompt = full_prompt.replace("{json_template}", json_template)
369
  full_prompt = full_prompt.replace("{amount}", str(quantidade_de_virals))
370
 
371
  with open(full_prompt_path, "w", encoding="utf-8") as f:
372
  f.write(full_prompt)
373
- # print(f"[INFO] Full reference prompt saved to: {full_prompt_path}")
374
  except Exception as e:
375
  print(f"[WARN] Could not save prompt_full.txt: {e}")
376
- # -------------------------------------
377
 
378
- all_segments = []
379
 
380
  print(f"Processando {len(output_texts)} chunks usando modo: {ai_mode.upper()}")
381
 
382
- # Initialize Local Model if needed (Once)
383
  local_llm_instance = None
384
  if ai_mode == "local":
385
  if not HAS_LLAMA_CPP:
@@ -387,10 +629,9 @@ OUTPUT JSON ONLY:
387
  return {"segments": []}
388
 
389
  models_dir = os.path.join(base_dir, 'models')
390
- # Check if model_name is full path or filename
391
  model_path = os.path.join(models_dir, model_name)
392
  if not os.path.exists(model_path):
393
- if os.path.exists(model_name): # Absolute path check
394
  model_path = model_name
395
  else:
396
  print(f"Error: Model not found at {model_path}")
@@ -398,7 +639,6 @@ OUTPUT JSON ONLY:
398
 
399
  print(f"[INFO] Loading Local Model: {os.path.basename(model_path)} (This may take a while)...")
400
  try:
401
- # Adjust n_gpu_layers=-1 for max GPU usage. n_ctx=8192 for long context.
402
  local_llm_instance = Llama(
403
  model_path=model_path,
404
  n_gpu_layers=-1,
@@ -411,8 +651,6 @@ OUTPUT JSON ONLY:
411
 
412
  for i, prompt in enumerate(output_texts):
413
  response_text = ""
414
-
415
- # Always save prompt to file (Manual, Gemini, or G4F)
416
  manual_prompt_path = os.path.join(project_folder, f"prompt_part_{i+1}.txt")
417
  try:
418
  with open(manual_prompt_path, "w", encoding="utf-8") as f:
@@ -422,7 +660,6 @@ OUTPUT JSON ONLY:
422
 
423
  if ai_mode == "manual":
424
  print(f"\n[INFO] O prompt foi salvo em: {manual_prompt_path}")
425
-
426
  print("\n" + "="*60)
427
  print(f"CHUNK {i+1}/{len(output_texts)}")
428
  print("="*60)
@@ -446,11 +683,10 @@ OUTPUT JSON ONLY:
446
  print(f"Arquivo {response_json_path} não encontrado.")
447
  else:
448
  response_text = user_input
449
- # Tenta ler mais linhas se parecer incompleto (bruteforce simples)
450
  if response_text.strip().startswith("{") and not response_text.strip().endswith("}"):
451
  print("Parece incompleto. Cole o resto e dê Enter (ou Ctrl+C para cancelar):")
452
  try:
453
- rest = sys.stdin.read() # Isso pode travar no Windows sem EOF explícito
454
  response_text += rest
455
  except:
456
  pass
@@ -458,15 +694,12 @@ OUTPUT JSON ONLY:
458
  elif ai_mode == "gemini":
459
  print(f"Enviando chunk {i+1} para o Gemini (Model: {model_name})...")
460
  response_text = call_gemini(prompt, api_key, model_name=model_name)
461
-
462
  elif ai_mode == "g4f":
463
  print(f"Enviando chunk {i+1} para o G4F (Model: {model_name})...")
464
  response_text = call_g4f(prompt, model_name=model_name)
465
-
466
  elif ai_mode == "local" and local_llm_instance:
467
  print(f"Processing chunk {i+1} with Local LLM...")
468
  try:
469
- # Use chat completion for better formatting handling
470
  output = local_llm_instance.create_chat_completion(
471
  messages=[
472
  {"role": "system", "content": "You are a helpful assistant that outputs only JSON."},
@@ -488,214 +721,23 @@ OUTPUT JSON ONLY:
488
  print(f"[DEBUG] Raw response saved to: {raw_response_path}")
489
  except Exception as e:
490
  print(f"[WARN] Failed to save raw response: {e}")
491
- # ----------------------------------------
492
 
493
  # Processar resposta
494
  try:
495
  data = clean_json_response(response_text)
496
  chunk_segments = data.get("segments", [])
497
  print(f"Encontrados {len(chunk_segments)} segmentos neste chunk.")
498
- all_segments.extend(chunk_segments)
499
  except json.JSONDecodeError:
500
- print(f"Erro: Resposta inválida (não é JSON válida).")
501
- print(f"Conteúdo recebido (primeiros 100 chars): {response_text[:100]}...")
502
  except Exception as e:
503
  print(f"Erro desconhecido ao processar chunk: {e}")
504
 
505
- # Sort segments by score (descending) to get the best ones globally
506
- try:
507
- all_segments.sort(key=lambda x: int(x.get('score', 0)), reverse=True)
508
- except:
509
- pass # If scores are not valid integers, skip sorting or rely on order
510
-
511
- # --- POST-PROCESSING: Match Text to Timestamps ---
512
- processed_segments = []
513
-
514
- # Helper to find text in segments
515
- def find_timestamp_by_text(target_text, segments_list, start_search_idx=0, is_end=False):
516
- # Normalize target
517
- target_clean = "".join(target_text.lower().split())
518
- if not target_clean: return None, start_search_idx
519
-
520
- current_concat = ""
521
- param_idx = -1
522
-
523
- # Sliding window or simple linear scan?
524
- # Linear scan matches sequences of words.
525
- # We look for the FIRST occurrence of target_text in segments_list starting from start_search_idx
526
-
527
- # Optimization: Create a long string of remaining segments and find index, then map back?
528
- # Better: iterate segments.
529
-
530
- for i in range(start_search_idx, len(segments_list)):
531
- seg_text = segments_list[i]['text']
532
- # We treat this simple: check if target is basically inside this segment or spanning a few.
533
- # Since target is "5-10 words", it might span 2 segments.
534
-
535
- # Simple approach: Check if target (normalized) is substring of
536
- # (prev + current + next) normalized.
537
- # This is complex.
538
-
539
- # SIMPLER APPROACH:
540
- # The AI returns 'start_time_ref' (e.g., "(12s)").
541
- # We jump to that time in segments_list.
542
- # Then we look for the text in that vicinity.
543
- pass
544
-
545
- return None, -1
546
-
547
- # SIMPLIFIED MATCHING LOGIC
548
- # 1. Use 'start_time_ref' to find approximate index.
549
- # 2. Search locally for 'start_text'.
550
- # 3. Search forward for 'end_text'.
551
-
552
- print(f"[DEBUG] Matching {len(all_segments)} raw segments to timestamps...")
553
-
554
- for seg in all_segments:
555
- try:
556
- # 1. Parse Reference Time
557
- ref_time_str = seg.get('start_time_ref', '(0s)')
558
- ref_time_val = 0
559
- try:
560
- ref_time_val = int(re.search(r'\d+', ref_time_str).group())
561
- except:
562
- ref_time_val = 0
563
-
564
- # Find segment index closest to ref_time
565
- start_idx = 0
566
- min_diff = 999999
567
- for i, s in enumerate(transcript_segments):
568
- diff = abs(s['start'] - ref_time_val)
569
- if diff < min_diff:
570
- min_diff = diff
571
- start_idx = i
572
- if s['start'] > ref_time_val + 10: # Stop if we went too far
573
- break
574
-
575
- # Backtrack a bit in case Ref was slightly off or text started earlier
576
- start_idx = max(0, start_idx - 5)
577
-
578
- # 2. Find Exact Start Text
579
- start_text_target = seg.get('start_text', '').lower().strip()
580
- # Normalize: remove punctuation
581
- start_text_target = re.sub(r'[^\w\s]', '', start_text_target)
582
-
583
- final_start_time = -1
584
- match_start_idx = -1
585
-
586
- # Search window: forward 50 segments
587
- search_limit = min(len(transcript_segments), start_idx + 50)
588
-
589
- for i in range(start_idx, search_limit):
590
- s_text = transcript_segments[i]['text'].lower()
591
- s_text = re.sub(r'[^\w\s]', '', s_text)
592
-
593
- # Check for partial match (start of sentence)
594
- if start_text_target and (start_text_target in s_text or s_text in start_text_target):
595
- final_start_time = transcript_segments[i]['start']
596
- match_start_idx = i
597
- break
598
-
599
- # Fallback: use Ref Time if text match fails
600
- if final_start_time == -1:
601
- final_start_time = transcript_segments[start_idx]['start'] if start_idx < len(transcript_segments) else ref_time_val
602
- match_start_idx = start_idx
603
-
604
- # 3. Find End Text (starting from match_start_idx)
605
- end_text_target = seg.get('end_text', '').lower().strip()
606
- end_text_target = re.sub(r'[^\w\s]', '', end_text_target)
607
-
608
- final_end_time = -1
609
-
610
- if match_start_idx != -1:
611
- # Search forward for end text, extended range
612
- # Use a larger window but we will sanity check duration later
613
- search_end_limit = min(len(transcript_segments), match_start_idx + 200)
614
-
615
- for i in range(match_start_idx, search_end_limit):
616
- s_text = transcript_segments[i]['text'].lower()
617
- s_text = re.sub(r'[^\w\s]', '', s_text)
618
-
619
- if end_text_target and (end_text_target in s_text or s_text in end_text_target):
620
- final_end_time = transcript_segments[i]['end']
621
- break
622
-
623
- # Fallback End Time checking Duration
624
- if final_end_time == -1:
625
- final_end_time = final_start_time + tempo_minimo # safe default
626
-
627
- # Calculate Duration
628
- duration = final_end_time - final_start_time
629
-
630
- # Validate Duration (Min)
631
- if duration < 5:
632
- duration = tempo_minimo
633
- final_end_time = final_start_time + duration
634
-
635
- # Validate Duration (Max)
636
- # If AI selected start and end points that result in a huge segment, clamp it.
637
- if duration > tempo_maximo:
638
- print(f"[WARN] Segmento excede max duration ({duration:.2f}s > {tempo_maximo}s). Cortando para {tempo_maximo}s.")
639
- final_end_time = final_start_time + tempo_maximo
640
- duration = tempo_maximo
641
-
642
- # Construct Final Segment
643
- processed_segments.append({
644
- "title": seg.get('title', 'Viral Segment'),
645
- "start_time": final_start_time,
646
- "end_time": final_end_time,
647
- "hook": seg.get('title', ''), # Use title as hook text
648
- "reasoning": seg.get('reasoning', ''),
649
- "score": seg.get('score', 0),
650
- "duration": duration
651
- })
652
-
653
- except Exception as e:
654
- print(f"[WARN] Error processing segment {seg}: {e}")
655
- continue
656
-
657
- # Deduplication (Keep highest score)
658
- unique_segments = []
659
- # Sort by Score desc
660
- processed_segments.sort(key=lambda x: int(x.get('score', 0)), reverse=True)
661
-
662
- for candidate in processed_segments:
663
- is_dup = False
664
- for existing in unique_segments:
665
- s1, e1 = candidate['start_time'], candidate['end_time']
666
- s2, e2 = existing['start_time'], existing['end_time']
667
-
668
- overlap_start = max(s1, s2)
669
- overlap_end = min(e1, e2)
670
-
671
- if overlap_end > overlap_start:
672
- intersection = overlap_end - overlap_start
673
- if intersection > 5: # more than 5 seconds overlap
674
- is_dup = True
675
- break
676
- if not is_dup:
677
- unique_segments.append(candidate)
678
-
679
- all_segments = unique_segments
680
- print(f"[DEBUG] Finished processing. {len(all_segments)} segments valid.")
681
- # ---------------------------
682
-
683
- # Limit to the requested number of segments
684
- if quantidade_de_virals and len(all_segments) > quantidade_de_virals:
685
- print(f"Filtrando os top {quantidade_de_virals} segmentos de {len(all_segments)} candidatos encontrados nos chunks.")
686
- all_segments = all_segments[:quantidade_de_virals]
687
-
688
- final_result = {"segments": all_segments}
689
-
690
- # Validação básica de duração nos resultados (opcional, mas bom pra evitar erros no ffmpeg)
691
- # Convertendo milliseconds pra int se necessário, garantindo sanidade
692
- validated_segments = []
693
- for seg in final_result['segments']:
694
- # Garante start_time
695
- if 'start_time' in seg:
696
- # Deixa passar, cut_segments lida com int/str conversion
697
- validated_segments.append(seg)
698
-
699
- final_result['segments'] = validated_segments
700
-
701
- return final_result
 
3
  import re
4
  import sys
5
  import time
6
+ import ast
7
 
8
  # Tenta importar bibliotecas de IA opcionalmente
9
  try:
 
12
  except ImportError:
13
  HAS_GEMINI = False
14
 
15
+ try:
16
+ import g4f
17
+ HAS_G4F = True
18
  except ImportError:
19
  HAS_G4F = False
20
 
 
25
  HAS_LLAMA_CPP = False
26
 
27
  def clean_json_response(response_text):
28
+ """
29
+ Limpa a resposta focando em encontrar o objeto JSON que contém a chave "segments".
30
+ Estratégia: Busca a palavra "segments", encontra o '{' anterior e usa raw_decode.
31
+ """
32
+ if not isinstance(response_text, str):
33
+ response_text = str(response_text)
34
+
35
  if not response_text:
36
  return {"segments": []}
37
+
38
+ # 1. Limpeza preliminar
39
+ # Remove tags de pensamento (DeepSeek R1)
40
+ response_text = re.sub(r'<think>.*?</think>', '', response_text, flags=re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # Normaliza escapes excessivos (\n virando \\n) e aspas se parecer necessário
43
+ try:
44
+ if "\\n" in response_text or "\\\"" in response_text:
45
+ # Tenta um decode básico de escapes
46
+ response_text = response_text.replace("\\n", "\n").replace("\\\"", "\"").replace("\\'", "'")
47
+ except:
48
+ pass
49
+
50
+ # 2. Busca pela palavra-chave "segments"
51
+ # Procura índices de todas as ocorrências de 'segments'
52
+ matches = [m.start() for m in re.finditer(r'segments', response_text)]
53
 
54
+ if not matches:
55
+ # Se não achou segments, retorna vazio
56
+ return {"segments": []}
57
+
58
+ # Tenta extrair JSON válido a partir de cada ocorrência
59
+ for match_idx in matches:
60
+ # Procura o '{' mais próximo ANTES de "segments"
61
+ # Limita busca a 5000 chars para trás para performance
62
+ start_search = max(0, match_idx - 5000)
63
+ snippet_before = response_text[start_search:match_idx]
64
+
65
+ # Encontra o ÚLTIMO '{' no snippet
66
+ last_open_rel = snippet_before.rfind('{')
67
+
68
+ if last_open_rel != -1:
69
+ real_start = start_search + last_open_rel
70
+ candidate_text = response_text[real_start:]
71
+
72
+ # Tentativa A: json.raw_decode
73
+ try:
74
+ decoder = json.JSONDecoder()
75
+ obj, _ = decoder.raw_decode(candidate_text)
76
+ if 'segments' in obj and isinstance(obj['segments'], list):
77
+ return obj
78
+ except:
79
+ pass
80
+
81
+ # Tentativa B: ast.literal_eval
82
+ try:
83
+ balance = 0
84
+ in_string = False
85
+ escape = False
86
+ found_end = -1
87
+
88
+ for i, char in enumerate(candidate_text):
89
+ if escape:
90
+ escape = False
91
+ continue
92
+ if char == '\\':
93
+ escape = True
94
+ continue
95
+ if char == "'" or char == '"':
96
+ in_string = not in_string
97
+ continue
98
+
99
+ if not in_string:
100
+ if char == '{':
101
+ balance += 1
102
+ elif char == '}':
103
+ balance -= 1
104
+ if balance == 0:
105
+ found_end = i
106
+ break
107
+
108
+ if found_end != -1:
109
+ clean_cand = candidate_text[:found_end+1]
110
+ obj = ast.literal_eval(clean_cand)
111
+ if 'segments' in obj and isinstance(obj['segments'], list):
112
+ return obj
113
+ except:
114
+ pass
115
+
116
+ # 3. Fallback: Extração bruta de markdown
117
+ try:
118
+ match = re.search(r"```json(.*?)```", response_text, re.DOTALL)
119
+ if match:
120
+ return json.loads(match.group(1))
121
+ except:
122
+ pass
123
+
124
+ return {"segments": []}
125
 
126
 
127
  def preprocess_transcript_for_ai(segments):
128
  """
129
  Concatenates transcript segments into a single string with embedded time tags.
 
 
130
  """
131
  if not segments:
132
  return ""
 
143
  text = seg.get('text', '').strip()
144
  end_time = seg.get('end', 0)
145
 
 
146
  full_text += text + " "
147
 
 
148
  if end_time - last_tag_time >= 4:
149
  full_text += f"({int(end_time)}s) "
150
  last_tag_time = end_time
 
169
  except Exception as e:
170
  error_str = str(e)
171
  if "429" in error_str or "Quota exceeded" in error_str:
172
+ wait_time = base_wait * (attempt + 1)
173
 
 
174
  match = re.search(r"retry in (\d+(\.\d+)?)s", error_str)
175
  if match:
176
+ wait_time = float(match.group(1)) + 5.0
177
 
178
  print(f"[429] Quota Exceeded. Waiting {wait_time:.2f}s before retry {attempt+1}/{max_retries}...", flush=True)
179
  time.sleep(wait_time)
 
189
  if not HAS_G4F:
190
  raise ImportError("A biblioteca 'g4f' não está instalada. Instale com: pip install g4f")
191
 
192
+ max_retries = 3
193
+ base_wait = 5
194
+
195
+ for attempt in range(max_retries):
196
+ try:
197
+ response = g4f.ChatCompletion.create(
198
+ model=model_name,
199
+ messages=[{"role": "user", "content": prompt}],
200
+ )
201
+
202
+ if isinstance(response, dict):
203
+ if 'error' in response:
204
+ raise Exception(f"API Error: {response['error']}")
205
+ if 'choices' in response and isinstance(response['choices'], list):
206
+ if len(response['choices']) > 0:
207
+ content = response['choices'][0].get('message', {}).get('content', '')
208
+ if content:
209
+ return content
210
+ if not response:
211
+ raise ValueError("Empty Dict response")
212
+
213
+ return json.dumps(response)
214
+
215
+ if not response:
216
+ print(f"[WARN] G4F retornou resposta vazia. Tentativa {attempt+1}/{max_retries}")
217
+ time.sleep(base_wait)
218
+ continue
219
+
220
+ try:
221
+ return json.dumps(response, ensure_ascii=False)
222
+ except:
223
+ return str(response)
224
+
225
+ except Exception as e:
226
+ print(f"[WARN] Erro na API do G4F (Tentativa {attempt+1}/{max_retries}): {e}")
227
+ if attempt < max_retries - 1:
228
+ wait_time = base_wait * (attempt + 1)
229
+ time.sleep(wait_time)
230
+
231
+ print(f"Falha crítica após {max_retries} tentativas no G4F.")
232
+ return "{}"
233
 
234
+ def load_transcript(project_folder):
235
+ """Parses input.tsv or input.srt from the project folder."""
236
  input_tsv = os.path.join(project_folder, 'input.tsv')
237
  input_srt = os.path.join(project_folder, 'input.srt')
238
 
 
239
  transcript_segments = []
240
 
241
  # Try to load TSV first (more reliable time)
 
262
  if not transcript_segments and os.path.exists(input_srt):
263
  with open(input_srt, 'r', encoding='utf-8') as f:
264
  srt_content = f.read()
 
 
265
  pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n((?:(?!\n\n).)*)', re.DOTALL)
266
  matches = pattern.findall(srt_content)
267
 
 
277
 
278
  if not transcript_segments:
279
  raise ValueError("Could not parse transcript from TSV or SRT.")
280
+
281
+ return transcript_segments
282
 
283
+ def process_segments(raw_segments, transcript_segments, min_duration, max_duration, output_count=None):
284
+ """
285
+ Aligns raw AI segments (with reference tags) to actual transcript timestamps.
286
+ Applies constraints, validation, and deduplication.
287
+ """
288
+
289
+ all_segments = raw_segments
290
+ tempo_minimo = min_duration
291
+ tempo_maximo = max_duration
292
+
293
+ # Sort segments by score (descending)
294
+ try:
295
+ all_segments.sort(key=lambda x: int(x.get('score', 0)), reverse=True)
296
+ except:
297
+ pass
298
+
299
+ # --- POST-PROCESSING: Match Text to Timestamps ---
300
+ processed_segments = []
301
+
302
+ print(f"[DEBUG] Matching {len(all_segments)} raw segments to timestamps...")
303
+
304
+ for seg in all_segments:
305
+ try:
306
+ # 1. Parse Reference Time
307
+ ref_time_str = seg.get('start_time_ref', '(0s)')
308
+ ref_time_val = 0
309
+ try:
310
+ if isinstance(ref_time_str, str):
311
+ match = re.search(r'\d+', ref_time_str)
312
+ if match:
313
+ ref_time_val = int(match.group())
314
+ else:
315
+ ref_time_val = int(ref_time_str)
316
+ except:
317
+ ref_time_val = 0
318
+
319
+ # Find segment index closest to ref_time
320
+ start_idx = 0
321
+ min_diff = 999999
322
+ for i, s in enumerate(transcript_segments):
323
+ diff = abs(s['start'] - ref_time_val)
324
+ if diff < min_diff:
325
+ min_diff = diff
326
+ start_idx = i
327
+ if s['start'] > ref_time_val + 10:
328
+ break
329
+
330
+ # Backtrack
331
+ start_idx = max(0, start_idx - 5)
332
+
333
+ # 2. Find Exact Start Text
334
+ start_text_target = seg.get('start_text', '').lower().strip()
335
+ # Normalize
336
+ start_text_target = re.sub(r'[^\w\s]', '', start_text_target)
337
+
338
+ final_start_time = -1
339
+ match_start_idx = -1
340
+
341
+ # Search window
342
+ search_limit = min(len(transcript_segments), start_idx + 50)
343
+
344
+ for i in range(start_idx, search_limit):
345
+ s_text = transcript_segments[i]['text'].lower()
346
+ s_text = re.sub(r'[^\w\s]', '', s_text)
347
+
348
+ # Check for partial match
349
+ if start_text_target and (start_text_target in s_text or s_text in start_text_target):
350
+ final_start_time = transcript_segments[i]['start']
351
+ match_start_idx = i
352
+ break
353
+
354
+ # Fallback
355
+ if final_start_time == -1:
356
+ final_start_time = transcript_segments[start_idx]['start'] if start_idx < len(transcript_segments) else ref_time_val
357
+ match_start_idx = start_idx
358
+
359
+ # 3. Find End Text
360
+ end_text_target = seg.get('end_text', '').lower().strip()
361
+ end_text_target = re.sub(r'[^\w\s]', '', end_text_target)
362
+
363
+ final_end_time = -1
364
+
365
+ if match_start_idx != -1:
366
+ search_end_limit = min(len(transcript_segments), match_start_idx + 200)
367
+
368
+ for i in range(match_start_idx, search_end_limit):
369
+ s_text = transcript_segments[i]['text'].lower()
370
+ s_text = re.sub(r'[^\w\s]', '', s_text)
371
+
372
+ if end_text_target and (end_text_target in s_text or s_text in end_text_target):
373
+ final_end_time = transcript_segments[i]['end']
374
+ break
375
+
376
+ # Fallback End Time
377
+ if final_end_time == -1:
378
+ final_end_time = final_start_time + tempo_minimo
379
+
380
+ # Calculate Duration
381
+ duration = final_end_time - final_start_time
382
+
383
+ # Validate Duration (Min)
384
+ if duration < tempo_minimo:
385
+ print(f"[WARN] Segmento menor que duration min ({duration:.2f}s < {tempo_minimo}s). Estendendo para {tempo_minimo}s.")
386
+ duration = tempo_minimo
387
+ final_end_time = final_start_time + duration
388
+
389
+ # Validate Duration (Max)
390
+ if duration > tempo_maximo:
391
+ print(f"[WARN] Segmento excede max duration ({duration:.2f}s > {tempo_maximo}s). Cortando para {tempo_maximo}s.")
392
+ final_end_time = final_start_time + tempo_maximo
393
+ duration = tempo_maximo
394
+
395
+ # Construct Final Segment
396
+ processed_segments.append({
397
+ "title": seg.get('title', 'Viral Segment'),
398
+ "start_time": final_start_time,
399
+ "end_time": final_end_time,
400
+ "hook": seg.get('title', ''),
401
+ "reasoning": seg.get('reasoning', ''),
402
+ "score": seg.get('score', 0),
403
+ "duration": duration
404
+ })
405
+
406
+ except Exception as e:
407
+ print(f"[WARN] Error processing segment {seg}: {e}")
408
+ continue
409
+
410
+ # Deduplication
411
+ unique_segments = []
412
+ processed_segments.sort(key=lambda x: int(x.get('score', 0)), reverse=True)
413
 
414
+ for candidate in processed_segments:
415
+ is_dup = False
416
+ for existing in unique_segments:
417
+ s1, e1 = candidate['start_time'], candidate['end_time']
418
+ # Simple float equality isn't safe, but max/min handles it
419
+ s2, e2 = existing['start_time'], existing['end_time']
420
+
421
+ overlap_start = max(s1, s2)
422
+ overlap_end = min(e1, e2)
423
+
424
+ if overlap_end > overlap_start:
425
+ intersection = overlap_end - overlap_start
426
+ if intersection > 5: # more than 5 seconds overlap
427
+ is_dup = True
428
+ print(f"[DEBUG] Dropping overlap: '{candidate.get('title')}' ({s1:.1f}-{e1:.1f}) overlaps with '{existing.get('title')}' ({s2:.1f}-{e2:.1f}) by {intersection:.1f}s")
429
+ break
430
+ if not is_dup:
431
+ unique_segments.append(candidate)
432
+
433
+ all_segments = unique_segments
434
+ print(f"[DEBUG] Finished processing. {len(all_segments)} segments valid.")
435
+
436
+ if output_count and len(all_segments) > output_count:
437
+ print(f"Filtrando os top {output_count} segmentos de {len(all_segments)} candidatos encontrados nos chunks.")
438
+ all_segments = all_segments[:output_count]
439
+
440
+ final_result = {"segments": all_segments}
441
+
442
+ # Validação básica de que temos start_time
443
+ validated_segments = []
444
+ for seg in final_result['segments']:
445
+ if 'start_time' in seg:
446
+ validated_segments.append(seg)
447
+
448
+ final_result['segments'] = validated_segments
449
+
450
+ return final_result
451
+
452
+
453
+ def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode="manual", api_key=None, project_folder="tmp", chunk_size_arg=None, model_name_arg=None):
454
+ quantidade_de_virals = num_segments
455
+
456
+ # 1. Load Transcript
457
+ transcript_segments = load_transcript(project_folder)
458
+
459
+ # 2. Pre-process Content
460
+ formatted_content = preprocess_transcript_for_ai(transcript_segments)
461
  content = formatted_content
462
 
463
  # Load Config and Prompt
 
465
  config_path = os.path.join(base_dir, 'api_config.json')
466
  prompt_path = os.path.join(base_dir, 'prompt.txt')
467
 
 
468
  config = {
469
  "selected_api": "gemini",
470
  "gemini": {
 
482
  try:
483
  with open(config_path, 'r', encoding='utf-8') as f:
484
  loaded_config = json.load(f)
 
485
  if "gemini" in loaded_config: config["gemini"].update(loaded_config["gemini"])
486
  if "g4f" in loaded_config: config["g4f"].update(loaded_config["g4f"])
487
  if "selected_api" in loaded_config: config["selected_api"] = loaded_config["selected_api"]
488
  except Exception as e:
489
+ print(f"Erro ao ler api_config.json: {e}")
490
 
491
+ # Config Vars
492
+ current_chunk_size = 15000
493
  model_name = ""
494
 
495
  if ai_mode == "gemini":
496
  cfg_chunk = config["gemini"].get("chunk_size", 15000)
497
  current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else cfg_chunk
 
498
  cfg_model = config["gemini"].get("model", "gemini-2.5-flash-lite-preview-09-2025")
499
  model_name = model_name_arg if model_name_arg else cfg_model
500
+ if not api_key: api_key = config["gemini"].get("api_key", "")
 
 
501
 
502
  elif ai_mode == "g4f":
503
  cfg_chunk = config["g4f"].get("chunk_size", 2000)
504
  current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else cfg_chunk
 
505
  cfg_model = config["g4f"].get("model", "gpt-4o-mini")
506
  model_name = model_name_arg if model_name_arg else cfg_model
507
 
508
  elif ai_mode == "local":
 
509
  current_chunk_size = chunk_size_arg if chunk_size_arg and int(chunk_size_arg) > 0 else 3000
 
510
  model_name = model_name_arg if model_name_arg else ""
511
 
512
  system_prompt_template = ""
 
514
  with open(prompt_path, 'r', encoding='utf-8') as f:
515
  system_prompt_template = f.read()
516
  else:
 
517
  print("Aviso: prompt.txt não encontrado. Usando prompt interno.")
518
  system_prompt_template = """You are a World-Class Viral Video Editor.
519
  {context_instruction}
520
  Analyze the transcript below with time tags (XXs). Find {amount} viral segments.
521
+ Constraints: Each segment MUST be between {min_duration} seconds and {max_duration} seconds.
522
  IMPORTANT: Output "Title", "Hook", and "Reasoning" in the SAME LANGUAGE as the transcript (e.g., if transcript is Portuguese, output Portuguese).
523
  TRANSCRIPT:
524
  {transcript_chunk}
 
541
  }
542
  '''
543
 
544
+ # Chunking
 
545
  chunk_size = int(current_chunk_size)
 
 
546
  overlap_size = max(1000, int(chunk_size * 0.1))
547
 
548
  chunks = []
 
553
 
554
  while start < content_len:
555
  end = min(start + chunk_size, content_len)
 
 
 
 
 
556
  if end < content_len:
557
  last_space = content.rfind(' ', start, end)
558
  if last_space != -1 and last_space > start:
559
  end = last_space
 
560
  chunk_text = content[start:end]
561
+ if chunk_text.strip():
562
  chunks.append(chunk_text)
 
563
  if end >= content_len:
564
  break
 
 
565
  next_start = max(start + 1, end - overlap_size)
 
 
566
  safe_space = content.rfind(' ', start, next_start)
567
  if safe_space != -1:
568
  start = safe_space + 1
 
580
  if len(chunks) > 1:
581
  context_instruction = f"Part {i+1} of {len(chunks)}. "
582
 
 
583
  try:
584
  prompt = system_prompt_template.format(
585
  context_instruction=context_instruction,
 
588
  max_duration=tempo_maximo,
589
  transcript_chunk=chunk,
590
  json_template=json_template,
591
+ amount=quantidade_de_virals
592
  )
593
  except KeyError as e:
 
 
 
594
  prompt = system_prompt_template
595
  prompt = prompt.replace("{context_instruction}", context_instruction)
596
  prompt = prompt.replace("{virality_instruction}", virality_instruction)
 
602
 
603
  output_texts.append(prompt)
604
 
 
605
  try:
606
  full_prompt_path = os.path.join(project_folder, "prompt_full.txt")
 
607
  full_prompt = system_prompt_template
608
  full_prompt = full_prompt.replace("{context_instruction}", "Full Video Transcript Analysis")
609
  full_prompt = full_prompt.replace("{virality_instruction}", virality_instruction)
610
  full_prompt = full_prompt.replace("{min_duration}", str(tempo_minimo))
611
  full_prompt = full_prompt.replace("{max_duration}", str(tempo_maximo))
612
+ full_prompt = full_prompt.replace("{transcript_chunk}", content)
613
  full_prompt = full_prompt.replace("{json_template}", json_template)
614
  full_prompt = full_prompt.replace("{amount}", str(quantidade_de_virals))
615
 
616
  with open(full_prompt_path, "w", encoding="utf-8") as f:
617
  f.write(full_prompt)
 
618
  except Exception as e:
619
  print(f"[WARN] Could not save prompt_full.txt: {e}")
 
620
 
621
+ all_raw_segments = []
622
 
623
  print(f"Processando {len(output_texts)} chunks usando modo: {ai_mode.upper()}")
624
 
 
625
  local_llm_instance = None
626
  if ai_mode == "local":
627
  if not HAS_LLAMA_CPP:
 
629
  return {"segments": []}
630
 
631
  models_dir = os.path.join(base_dir, 'models')
 
632
  model_path = os.path.join(models_dir, model_name)
633
  if not os.path.exists(model_path):
634
+ if os.path.exists(model_name):
635
  model_path = model_name
636
  else:
637
  print(f"Error: Model not found at {model_path}")
 
639
 
640
  print(f"[INFO] Loading Local Model: {os.path.basename(model_path)} (This may take a while)...")
641
  try:
 
642
  local_llm_instance = Llama(
643
  model_path=model_path,
644
  n_gpu_layers=-1,
 
651
 
652
  for i, prompt in enumerate(output_texts):
653
  response_text = ""
 
 
654
  manual_prompt_path = os.path.join(project_folder, f"prompt_part_{i+1}.txt")
655
  try:
656
  with open(manual_prompt_path, "w", encoding="utf-8") as f:
 
660
 
661
  if ai_mode == "manual":
662
  print(f"\n[INFO] O prompt foi salvo em: {manual_prompt_path}")
 
663
  print("\n" + "="*60)
664
  print(f"CHUNK {i+1}/{len(output_texts)}")
665
  print("="*60)
 
683
  print(f"Arquivo {response_json_path} não encontrado.")
684
  else:
685
  response_text = user_input
 
686
  if response_text.strip().startswith("{") and not response_text.strip().endswith("}"):
687
  print("Parece incompleto. Cole o resto e dê Enter (ou Ctrl+C para cancelar):")
688
  try:
689
+ rest = sys.stdin.read()
690
  response_text += rest
691
  except:
692
  pass
 
694
  elif ai_mode == "gemini":
695
  print(f"Enviando chunk {i+1} para o Gemini (Model: {model_name})...")
696
  response_text = call_gemini(prompt, api_key, model_name=model_name)
 
697
  elif ai_mode == "g4f":
698
  print(f"Enviando chunk {i+1} para o G4F (Model: {model_name})...")
699
  response_text = call_g4f(prompt, model_name=model_name)
 
700
  elif ai_mode == "local" and local_llm_instance:
701
  print(f"Processing chunk {i+1} with Local LLM...")
702
  try:
 
703
  output = local_llm_instance.create_chat_completion(
704
  messages=[
705
  {"role": "system", "content": "You are a helpful assistant that outputs only JSON."},
 
721
  print(f"[DEBUG] Raw response saved to: {raw_response_path}")
722
  except Exception as e:
723
  print(f"[WARN] Failed to save raw response: {e}")
 
724
 
725
  # Processar resposta
726
  try:
727
  data = clean_json_response(response_text)
728
  chunk_segments = data.get("segments", [])
729
  print(f"Encontrados {len(chunk_segments)} segmentos neste chunk.")
730
+ all_raw_segments.extend(chunk_segments)
731
  except json.JSONDecodeError:
732
+ print(f"Erro: Resposta inválida.")
 
733
  except Exception as e:
734
  print(f"Erro desconhecido ao processar chunk: {e}")
735
 
736
+ # Call the alignment / processing logic
737
+ return process_segments(
738
+ all_raw_segments,
739
+ transcript_segments,
740
+ tempo_minimo,
741
+ tempo_maximo,
742
+ output_count=quantidade_de_virals
743
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/download_video.py CHANGED
@@ -6,8 +6,18 @@ from i18n.i18n import I18nAuto
6
  i18n = I18nAuto()
7
 
8
  def sanitize_filename(name):
9
- """Remove caracteres inválidos para nomes de arquivos/pastas."""
 
10
  cleaned = re.sub(r'[\\/*?:"<>|]', "", name)
 
 
 
 
 
 
 
 
 
11
  cleaned = cleaned.strip()
12
  return cleaned
13
 
@@ -38,8 +48,11 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
38
  info = ydl.extract_info(url, download=False)
39
  title = info.get('title')
40
  except Exception as e:
41
- print(i18n("Warning: Failed to extract info with cookies: {}").format(e))
42
-
 
 
 
43
  # Tentativa 2: Sem cookies
44
  if not title:
45
  try:
@@ -47,12 +60,20 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
47
  info = ydl.extract_info(url, download=False)
48
  title = info.get('title')
49
  except Exception as e:
50
- print(i18n("Error getting video info (without cookies): {}").format(e))
 
 
 
51
 
52
  # Fallback final
53
  if title:
54
  safe_title = sanitize_filename(title)
55
- print(i18n("Detected title: {}").format(title))
 
 
 
 
 
56
  else:
57
  print(i18n("WARNING: Title could not be obtained. Using 'Unknown_Video'."))
58
  safe_title = i18n("Unknown_Video")
@@ -69,7 +90,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
69
  # Verificação inteligente
70
  if os.path.exists(final_video_path):
71
  if os.path.getsize(final_video_path) > 1024:
72
- print(i18n("Video already exists at: {}").format(final_video_path))
 
 
 
73
  print(i18n("Skipping download and reusing local file."))
74
  return final_video_path, project_folder
75
  else:
@@ -127,7 +151,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
127
  'format': 'srt',
128
  }]
129
 
130
- print(i18n("Downloading video to: {}...").format(project_folder))
 
 
 
131
 
132
  # Tentativa 1: Com configuração original
133
  try:
@@ -178,7 +205,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
178
  new_name = os.path.join(project_folder, "input.srt") # Vamos padronizar tudo para .srt
179
 
180
  if ext.lower() == '.vtt':
181
- print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub)))
 
 
 
182
  try:
183
  with open(best_sub, 'r', encoding='utf-8') as f:
184
  lines = f.readlines()
@@ -252,7 +282,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
252
  with open(new_name, 'w', encoding='utf-8') as f_out:
253
  f_out.writelines(srt_content)
254
 
255
- print(i18n("Subtitle converted and cleaned: {}").format(new_name))
 
 
 
256
  try: os.remove(best_sub)
257
  except: pass
258
 
@@ -271,7 +304,10 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"):
271
  try: os.remove(new_name)
272
  except: pass
273
  os.rename(best_sub, new_name)
274
- print(i18n("SRT subtitle renamed to: {}").format(new_name))
 
 
 
275
 
276
  # Limpa sobras
277
  for extra in potential_subs[1:]:
 
6
  i18n = I18nAuto()
7
 
8
  def sanitize_filename(name):
9
+ """Remove caracteres inválidos e emojis para evitar erro de encoding no Windows."""
10
+ # Remove caracteres reservados do sistema de arquivos
11
  cleaned = re.sub(r'[\\/*?:"<>|]', "", name)
12
+
13
+ # Remove emojis e caracteres não suportados pelo console Windows (CP1252)
14
+ # Isso mantém acentos (á, ç, é) mas remove 😱, etc.
15
+ try:
16
+ cleaned = cleaned.encode('cp1252', 'ignore').decode('cp1252')
17
+ except:
18
+ # Fallback se não tiver CP1252: remove tudo não-ascii (remove acentos)
19
+ cleaned = cleaned.encode('ascii', 'ignore').decode('ascii')
20
+
21
  cleaned = cleaned.strip()
22
  return cleaned
23
 
 
48
  info = ydl.extract_info(url, download=False)
49
  title = info.get('title')
50
  except Exception as e:
51
+ try:
52
+ print(i18n("Warning: Failed to extract info with cookies: {}").format(e))
53
+ except UnicodeEncodeError:
54
+ print(i18n("Warning: Failed to extract info with cookies: [Encoding Error in Message]"))
55
+
56
  # Tentativa 2: Sem cookies
57
  if not title:
58
  try:
 
60
  info = ydl.extract_info(url, download=False)
61
  title = info.get('title')
62
  except Exception as e:
63
+ try:
64
+ print(i18n("Error getting video info (without cookies): {}").format(e))
65
+ except UnicodeEncodeError:
66
+ print(i18n("Error getting video info (without cookies): [Encoding Error in Message]"))
67
 
68
  # Fallback final
69
  if title:
70
  safe_title = sanitize_filename(title)
71
+ try:
72
+ print(i18n("Detected title: {}").format(title))
73
+ except UnicodeEncodeError:
74
+ # Fallback for Windows consoles that choke on Emojis
75
+ clean_title = title.encode('ascii', 'replace').decode('ascii')
76
+ print(i18n("Detected title: {}").format(clean_title))
77
  else:
78
  print(i18n("WARNING: Title could not be obtained. Using 'Unknown_Video'."))
79
  safe_title = i18n("Unknown_Video")
 
90
  # Verificação inteligente
91
  if os.path.exists(final_video_path):
92
  if os.path.getsize(final_video_path) > 1024:
93
+ try:
94
+ print(i18n("Video already exists at: {}").format(final_video_path))
95
+ except UnicodeEncodeError:
96
+ print(i18n("Video already exists at: {}").format(final_video_path.encode('ascii', 'replace').decode('ascii')))
97
  print(i18n("Skipping download and reusing local file."))
98
  return final_video_path, project_folder
99
  else:
 
151
  'format': 'srt',
152
  }]
153
 
154
+ try:
155
+ print(i18n("Downloading video to: {}...").format(project_folder))
156
+ except UnicodeEncodeError:
157
+ print(i18n("Downloading video to: {}...").format(project_folder.encode('ascii', 'replace').decode('ascii')))
158
 
159
  # Tentativa 1: Com configuração original
160
  try:
 
205
  new_name = os.path.join(project_folder, "input.srt") # Vamos padronizar tudo para .srt
206
 
207
  if ext.lower() == '.vtt':
208
+ try:
209
+ print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub)))
210
+ except UnicodeEncodeError:
211
+ print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub).encode('ascii', 'replace').decode('ascii')))
212
  try:
213
  with open(best_sub, 'r', encoding='utf-8') as f:
214
  lines = f.readlines()
 
282
  with open(new_name, 'w', encoding='utf-8') as f_out:
283
  f_out.writelines(srt_content)
284
 
285
+ try:
286
+ print(i18n("Subtitle converted and cleaned: {}").format(new_name))
287
+ except UnicodeEncodeError:
288
+ print(i18n("Subtitle converted and cleaned: {}").format(new_name.encode('ascii', 'replace').decode('ascii')))
289
  try: os.remove(best_sub)
290
  except: pass
291
 
 
304
  try: os.remove(new_name)
305
  except: pass
306
  os.rename(best_sub, new_name)
307
+ try:
308
+ print(i18n("SRT subtitle renamed to: {}").format(new_name))
309
+ except UnicodeEncodeError:
310
+ print(i18n("SRT subtitle renamed to: {}").format(new_name.encode('ascii', 'replace').decode('ascii')))
311
 
312
  # Limpa sobras
313
  for extra in potential_subs[1:]:
scripts/edit_video.py CHANGED
@@ -3,7 +3,7 @@ import numpy as np
3
  import os
4
  import subprocess
5
  import mediapipe as mp
6
- from scripts.one_face import crop_and_resize_single_face, resize_with_padding, detect_face_or_body
7
  from scripts.two_face import crop_and_resize_two_faces, detect_face_or_body_two_faces
8
  try:
9
  from scripts.face_detection_insightface import init_insightface, detect_faces_insightface, crop_and_resize_insightface
@@ -12,6 +12,48 @@ except ImportError:
12
  INSIGHTFACE_AVAILABLE = False
13
  print("InsightFace not found or error importing. Install with: pip install insightface onnxruntime-gpu")
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def get_center_bbox(bbox):
16
  # bbox: [x1, y1, x2, y2]
17
  return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
@@ -52,9 +94,9 @@ def sort_by_proximity(new_faces, old_faces, center_func):
52
 
53
  return new_faces
54
 
55
- def generate_short_fallback(input_file, output_file, index, project_folder, final_folder):
56
- """Fallback function: Center Crop if MediaPipe fails."""
57
- print(f"Processing (Center Crop Fallback): {input_file}")
58
  cap = cv2.VideoCapture(input_file)
59
  if not cap.isOpened():
60
  print(f"Error opening video: {input_file}")
@@ -65,9 +107,12 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
65
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
66
 
67
  # Target dimensions (9:16)
 
68
  target_width = 1080
69
  target_height = 1920
70
 
 
 
71
  # Use FFmpeg Pipe instead of cv2.VideoWriter to avoid OpenCV backend errors
72
  ffmpeg_cmd = [
73
  'ffmpeg', '-y', '-loglevel', 'error', '-hide_banner', '-stats',
@@ -77,12 +122,16 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
77
  '-pix_fmt', 'bgr24',
78
  '-r', str(fps),
79
  '-i', '-',
80
- '-c:v', 'libx264', # or h264_nvenc if available
81
- '-preset', 'fast',
82
  '-pix_fmt', 'yuv420p',
83
  output_file
84
  ]
85
 
 
 
 
 
86
  process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
87
 
88
  while True:
@@ -90,38 +139,19 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina
90
  if not ret:
91
  break
92
 
93
- # Resize mantendo aspect ratio para cobrir altura 1920
94
- scale_factor = target_height / height
95
- # Se após o resize a largura for menor que 1080, escala pela largura
96
- if width * scale_factor < target_width:
97
- scale_factor = target_width / width
98
-
99
- # Garante dimensoes inteiras
100
- new_w = int(width * scale_factor)
101
- new_h = int(height * scale_factor)
102
-
103
- resized = cv2.resize(frame, (new_w, new_h))
104
-
105
- # Crop center
106
- res_h, res_w, _ = resized.shape
107
- start_x = (res_w - target_width) // 2
108
- start_y = (res_h - target_height) // 2
109
-
110
- if start_x < 0: start_x = 0
111
- if start_y < 0: start_y = 0
112
-
113
- cropped = resized[start_y:start_y+target_height, start_x:start_x+target_width]
114
-
115
- # Resize final por segurança e validação
116
- if cropped.shape[1] != target_width or cropped.shape[0] != target_height:
117
- cropped = cv2.resize(cropped, (target_width, target_height))
118
 
119
  try:
120
  # Write raw bytes to ffmpeg stdin
121
- process.stdin.write(cropped.tobytes())
122
  except Exception as e:
123
  print(f"Error writing frame to ffmpeg pipe: {e}")
124
  pass
 
 
125
 
126
  cap.release()
127
  process.stdin.close()
@@ -137,11 +167,12 @@ def finalize_video(input_file, output_file, index, fps, project_folder, final_fo
137
 
138
  if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0:
139
  final_output = os.path.join(final_folder, f"final-output{str(index).zfill(3)}_processed.mp4")
 
140
  command = [
141
  "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
142
  "-i", output_file,
143
  "-i", audio_file,
144
- "-c:v", "h264_nvenc", "-preset", "fast", "-b:v", "5M",
145
  "-c:a", "aac", "-b:a", "192k",
146
  "-r", str(fps),
147
  final_output
@@ -191,7 +222,7 @@ def calculate_mouth_ratio(landmarks):
191
 
192
  return h / w
193
 
194
- def generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=None):
195
  try:
196
  cap = cv2.VideoCapture(input_file)
197
  if not cap.isOpened():
@@ -218,8 +249,8 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
218
 
219
  last_detected_faces = None
220
  last_frame_face_positions = None
221
- frames_since_last_detection = 0
222
- max_frames_without_detection = int(5 * fps) # Fallback timeout
223
 
224
  transition_duration = int(fps)
225
  transition_frames = []
@@ -275,9 +306,9 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
275
  else:
276
  transition_frames = []
277
  last_detected_faces = current_detections
278
- frames_since_last_detection = 0
279
  else:
280
- frames_since_last_detection += 1
281
 
282
  # Update next detection frame
283
  step = 5
@@ -300,10 +331,13 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
300
  if len(transition_frames) > 0:
301
  current_faces = transition_frames[0]
302
  transition_frames = transition_frames[1:]
303
- elif last_detected_faces is not None and frames_since_last_detection <= max_frames_without_detection:
304
  current_faces = last_detected_faces
305
  else:
306
- result = resize_with_padding(frame)
 
 
 
307
  coordinate_log.append({"frame": frame_index, "faces": []})
308
  out.write(result)
309
  continue
@@ -319,7 +353,10 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
319
  f = current_faces[0]
320
  result = crop_and_resize_single_face(frame, f)
321
  else:
322
- result = resize_with_padding(frame)
 
 
 
323
 
324
  out.write(result)
325
 
@@ -332,7 +369,7 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_
332
  print(f"Error in MediaPipe processing: {e}")
333
  raise e # Rethrow to trigger fallback
334
 
335
- def generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=None):
336
  """Face detection using OpenCV Haar Cascades."""
337
  print(f"Processing (Haar Cascade): {input_file}")
338
 
@@ -361,8 +398,8 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
361
  detection_interval = max(1, int(detection_period * fps))
362
  last_detected_faces = None
363
  last_frame_face_positions = None
364
- frames_since_last_detection = 0
365
- max_frames_without_detection = int(5 * fps)
366
 
367
  transition_duration = int(fps) # 1 second smooth transition
368
  transition_frames = []
@@ -399,18 +436,21 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
399
  else:
400
  transition_frames = []
401
  last_detected_faces = detections
402
- frames_since_last_detection = 0
403
  else:
404
- frames_since_last_detection += 1
405
 
406
  if len(transition_frames) > 0:
407
  current_faces = transition_frames[0]
408
  transition_frames = transition_frames[1:]
409
- elif last_detected_faces is not None and frames_since_last_detection <= max_frames_without_detection:
410
  current_faces = last_detected_faces
411
  else:
412
  # No face detected for a while -> Center/Padding fallback
413
- result = resize_with_padding(frame)
 
 
 
414
  out.write(result)
415
  continue
416
 
@@ -434,7 +474,7 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo
434
 
435
  finalize_video(input_file, output_file, index, fps, project_folder, final_folder)
436
 
437
- def generate_short_insightface(input_file, output_file, index, project_folder, final_folder, face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0):
438
  """Face detection using InsightFace (SOTA)."""
439
  print(f"Processing (InsightFace): {input_file} | Mode: {face_mode}")
440
 
@@ -457,8 +497,8 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
457
 
458
  last_detected_faces = None
459
  last_frame_face_positions = None
460
- frames_since_last_detection = 0
461
- max_frames_without_detection = 90 # 3 seconds timeout
462
 
463
  transition_duration = 4 # Smooth transition over 4 frames (almost continuous)
464
  transition_frames = []
@@ -872,9 +912,10 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
872
  # Reset transition if face count changed or first detect
873
  transition_frames = []
874
  last_detected_faces = detections
875
- frames_since_last_detection = 0
876
  else:
877
- frames_since_last_detection += 1
 
878
 
879
  # Update next detection frame based on NEW state
880
  step = 5 # Default fallback (very fast)
@@ -899,11 +940,14 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
899
  if len(transition_frames) > 0:
900
  current_faces = transition_frames[0]
901
  transition_frames = transition_frames[1:]
902
- elif last_detected_faces is not None and frames_since_last_detection <= max_frames_without_detection:
903
  current_faces = last_detected_faces
904
  else:
905
  # Fallback for this frame
906
- result = resize_with_padding(frame)
 
 
 
907
  out.write(result)
908
  continue
909
 
@@ -1017,7 +1061,7 @@ def generate_short_insightface(input_file, output_file, index, project_folder, f
1017
  return "1"
1018
 
1019
 
1020
- def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, segments_data=None):
1021
  # Lazy init solutions only when needed to avoid AttributeError if import failed partially
1022
  mp_face_detection = None
1023
  mp_face_mesh = None
@@ -1118,7 +1162,8 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec
1118
  active_speaker_mar=active_speaker_mar, active_speaker_score_diff=active_speaker_score_diff, include_motion=include_motion,
1119
  active_speaker_motion_deadzone=active_speaker_motion_deadzone,
1120
  active_speaker_motion_sensitivity=active_speaker_motion_sensitivity,
1121
- active_speaker_decay=active_speaker_decay)
 
1122
  if res: detected_mode = res
1123
  success = True
1124
  except Exception as e:
@@ -1134,7 +1179,7 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec
1134
  mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=2, refine_landmarks=True, min_detection_confidence=0.2, min_tracking_confidence=0.2) as face_mesh, \
1135
  mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
1136
 
1137
- generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=detection_period)
1138
  # We don't easily know detected mode here without return, assuming '1' or '2' based on last frame?
1139
  # Ideally function should return as well.
1140
  detected_mode = "1" # Placeholder, user didn't complain about stats.
@@ -1149,14 +1194,14 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec
1149
  if not success and (use_haar or (not mediapipe_working and not insightface_working)):
1150
  try:
1151
  print("Attempts with Haar Cascade...")
1152
- generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=detection_period)
1153
  success = True
1154
  except Exception as e2:
1155
  print(f"Haar fallback also failed: {e2}")
1156
 
1157
  # 4. Last Resort: Center Crop
1158
  if not success:
1159
- generate_short_fallback(input_file, output_file, index, project_folder, final_folder)
1160
  detected_mode = "1"
1161
  success = True
1162
 
 
3
  import os
4
  import subprocess
5
  import mediapipe as mp
6
+ from scripts.one_face import crop_and_resize_single_face, resize_with_padding, detect_face_or_body, crop_center_zoom
7
  from scripts.two_face import crop_and_resize_two_faces, detect_face_or_body_two_faces
8
  try:
9
  from scripts.face_detection_insightface import init_insightface, detect_faces_insightface, crop_and_resize_insightface
 
12
  INSIGHTFACE_AVAILABLE = False
13
  print("InsightFace not found or error importing. Install with: pip install insightface onnxruntime-gpu")
14
 
15
+
16
+ # Global cache for encoder
17
+ CACHED_ENCODER = None
18
+
19
+ def get_best_encoder():
20
+ global CACHED_ENCODER
21
+ if CACHED_ENCODER: return CACHED_ENCODER
22
+
23
+ try:
24
+ # Check available encoders
25
+ result = subprocess.run(['ffmpeg', '-hide_banner', '-encoders'], capture_output=True, text=True)
26
+ output = result.stdout
27
+
28
+ # Priority: NVENC (NVIDIA) > AMF (AMD) > QSV (Intel) > CPU
29
+ if "h264_nvenc" in output:
30
+ print("Encoder Detected: NVIDIA (h264_nvenc)")
31
+ CACHED_ENCODER = ("h264_nvenc", "fast") # p1-p7 presets could be used but 'fast' maps well
32
+ return CACHED_ENCODER
33
+
34
+ if "h264_amf" in output:
35
+ print("Encoder Detected: AMD (h264_amf)")
36
+ CACHED_ENCODER = ("h264_amf", "speed") # quality, speed, balanced
37
+ return CACHED_ENCODER
38
+
39
+ if "h264_qsv" in output:
40
+ print("Encoder Detected: Intel QSV (h264_qsv)")
41
+ CACHED_ENCODER = ("h264_qsv", "veryfast")
42
+ return CACHED_ENCODER
43
+
44
+ # Mac OS (VideoToolbox)
45
+ if "h264_videotoolbox" in output:
46
+ print("Encoder Detected: MacOS (h264_videotoolbox)")
47
+ CACHED_ENCODER = ("h264_videotoolbox", "default")
48
+ return CACHED_ENCODER
49
+
50
+ except Exception as e:
51
+ print(f"Error checking encoders: {e}")
52
+
53
+ print("Encoder Detected: CPU (libx264)")
54
+ CACHED_ENCODER = ("libx264", "ultrafast")
55
+ return CACHED_ENCODER
56
+
57
  def get_center_bbox(bbox):
58
  # bbox: [x1, y1, x2, y2]
59
  return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
 
94
 
95
  return new_faces
96
 
97
+ def generate_short_fallback(input_file, output_file, index, project_folder, final_folder, no_face_mode="padding"):
98
+ """Fallback function: Center Crop (Zoom) or Padding if detection fails."""
99
+ print(f"Processing (Fallback): {input_file} | Mode: {no_face_mode}")
100
  cap = cv2.VideoCapture(input_file)
101
  if not cap.isOpened():
102
  print(f"Error opening video: {input_file}")
 
107
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
108
 
109
  # Target dimensions (9:16)
110
+
111
  target_width = 1080
112
  target_height = 1920
113
 
114
+ encoder_name, encoder_preset = get_best_encoder()
115
+
116
  # Use FFmpeg Pipe instead of cv2.VideoWriter to avoid OpenCV backend errors
117
  ffmpeg_cmd = [
118
  'ffmpeg', '-y', '-loglevel', 'error', '-hide_banner', '-stats',
 
122
  '-pix_fmt', 'bgr24',
123
  '-r', str(fps),
124
  '-i', '-',
125
+ '-c:v', encoder_name,
126
+ '-preset', encoder_preset,
127
  '-pix_fmt', 'yuv420p',
128
  output_file
129
  ]
130
 
131
+ # If using hardware encoder, we might want to set bitrate to ensure quality
132
+ if "nvenc" in encoder_name or "amf" in encoder_name:
133
+ ffmpeg_cmd.extend(["-b:v", "5M"])
134
+
135
  process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
136
 
137
  while True:
 
139
  if not ret:
140
  break
141
 
142
+ if no_face_mode == "zoom":
143
+ result = crop_center_zoom(frame)
144
+ else:
145
+ result = resize_with_padding(frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  try:
148
  # Write raw bytes to ffmpeg stdin
149
+ process.stdin.write(result.tobytes())
150
  except Exception as e:
151
  print(f"Error writing frame to ffmpeg pipe: {e}")
152
  pass
153
+
154
+
155
 
156
  cap.release()
157
  process.stdin.close()
 
167
 
168
  if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0:
169
  final_output = os.path.join(final_folder, f"final-output{str(index).zfill(3)}_processed.mp4")
170
+ encoder_name, encoder_preset = get_best_encoder()
171
  command = [
172
  "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats",
173
  "-i", output_file,
174
  "-i", audio_file,
175
+ "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "5M",
176
  "-c:a", "aac", "-b:a", "192k",
177
  "-r", str(fps),
178
  final_output
 
222
 
223
  return h / w
224
 
225
+ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=None, no_face_mode="padding"):
226
  try:
227
  cap = cv2.VideoCapture(input_file)
228
  if not cap.isOpened():
 
249
 
250
  last_detected_faces = None
251
  last_frame_face_positions = None
252
+ last_success_frame = -1000
253
+ max_frames_without_detection = int(3.0 * fps) # 3 seconds timeout
254
 
255
  transition_duration = int(fps)
256
  transition_frames = []
 
306
  else:
307
  transition_frames = []
308
  last_detected_faces = current_detections
309
+ last_success_frame = frame_index
310
  else:
311
+ pass
312
 
313
  # Update next detection frame
314
  step = 5
 
331
  if len(transition_frames) > 0:
332
  current_faces = transition_frames[0]
333
  transition_frames = transition_frames[1:]
334
+ elif last_detected_faces is not None and (frame_index - last_success_frame) <= max_frames_without_detection:
335
  current_faces = last_detected_faces
336
  else:
337
+ if no_face_mode == "zoom":
338
+ result = crop_center_zoom(frame)
339
+ else:
340
+ result = resize_with_padding(frame)
341
  coordinate_log.append({"frame": frame_index, "faces": []})
342
  out.write(result)
343
  continue
 
353
  f = current_faces[0]
354
  result = crop_and_resize_single_face(frame, f)
355
  else:
356
+ if no_face_mode == "zoom":
357
+ result = crop_center_zoom(frame)
358
+ else:
359
+ result = resize_with_padding(frame)
360
 
361
  out.write(result)
362
 
 
369
  print(f"Error in MediaPipe processing: {e}")
370
  raise e # Rethrow to trigger fallback
371
 
372
+ def generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=None, no_face_mode="padding"):
373
  """Face detection using OpenCV Haar Cascades."""
374
  print(f"Processing (Haar Cascade): {input_file}")
375
 
 
398
  detection_interval = max(1, int(detection_period * fps))
399
  last_detected_faces = None
400
  last_frame_face_positions = None
401
+ last_success_frame = -1000
402
+ max_frames_without_detection = int(3.0 * fps)
403
 
404
  transition_duration = int(fps) # 1 second smooth transition
405
  transition_frames = []
 
436
  else:
437
  transition_frames = []
438
  last_detected_faces = detections
439
+ last_success_frame = frame_index
440
  else:
441
+ pass
442
 
443
  if len(transition_frames) > 0:
444
  current_faces = transition_frames[0]
445
  transition_frames = transition_frames[1:]
446
+ elif last_detected_faces is not None and (frame_index - last_success_frame) <= max_frames_without_detection:
447
  current_faces = last_detected_faces
448
  else:
449
  # No face detected for a while -> Center/Padding fallback
450
+ if no_face_mode == "zoom":
451
+ result = crop_center_zoom(frame)
452
+ else:
453
+ result = resize_with_padding(frame)
454
  out.write(result)
455
  continue
456
 
 
474
 
475
  finalize_video(input_file, output_file, index, fps, project_folder, final_folder)
476
 
477
+ def generate_short_insightface(input_file, output_file, index, project_folder, final_folder, face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, no_face_mode="padding"):
478
  """Face detection using InsightFace (SOTA)."""
479
  print(f"Processing (InsightFace): {input_file} | Mode: {face_mode}")
480
 
 
497
 
498
  last_detected_faces = None
499
  last_frame_face_positions = None
500
+ last_success_frame = -1000
501
+ max_frames_without_detection = int(3.0 * fps) # 3 seconds timeout
502
 
503
  transition_duration = 4 # Smooth transition over 4 frames (almost continuous)
504
  transition_frames = []
 
912
  # Reset transition if face count changed or first detect
913
  transition_frames = []
914
  last_detected_faces = detections
915
+ last_success_frame = frame_index
916
  else:
917
+ pass
918
+
919
 
920
  # Update next detection frame based on NEW state
921
  step = 5 # Default fallback (very fast)
 
940
  if len(transition_frames) > 0:
941
  current_faces = transition_frames[0]
942
  transition_frames = transition_frames[1:]
943
+ elif last_detected_faces is not None and (frame_index - last_success_frame) <= max_frames_without_detection:
944
  current_faces = last_detected_faces
945
  else:
946
  # Fallback for this frame
947
+ if no_face_mode == "zoom":
948
+ result = crop_center_zoom(frame)
949
+ else:
950
+ result = resize_with_padding(frame)
951
  out.write(result)
952
  continue
953
 
 
1061
  return "1"
1062
 
1063
 
1064
+ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, segments_data=None, no_face_mode="padding"):
1065
  # Lazy init solutions only when needed to avoid AttributeError if import failed partially
1066
  mp_face_detection = None
1067
  mp_face_mesh = None
 
1162
  active_speaker_mar=active_speaker_mar, active_speaker_score_diff=active_speaker_score_diff, include_motion=include_motion,
1163
  active_speaker_motion_deadzone=active_speaker_motion_deadzone,
1164
  active_speaker_motion_sensitivity=active_speaker_motion_sensitivity,
1165
+ active_speaker_decay=active_speaker_decay,
1166
+ no_face_mode=no_face_mode)
1167
  if res: detected_mode = res
1168
  success = True
1169
  except Exception as e:
 
1179
  mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=2, refine_landmarks=True, min_detection_confidence=0.2, min_tracking_confidence=0.2) as face_mesh, \
1180
  mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
1181
 
1182
+ generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=detection_period, no_face_mode=no_face_mode)
1183
  # We don't easily know detected mode here without return, assuming '1' or '2' based on last frame?
1184
  # Ideally function should return as well.
1185
  detected_mode = "1" # Placeholder, user didn't complain about stats.
 
1194
  if not success and (use_haar or (not mediapipe_working and not insightface_working)):
1195
  try:
1196
  print("Attempts with Haar Cascade...")
1197
+ generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=detection_period, no_face_mode=no_face_mode)
1198
  success = True
1199
  except Exception as e2:
1200
  print(f"Haar fallback also failed: {e2}")
1201
 
1202
  # 4. Last Resort: Center Crop
1203
  if not success:
1204
+ generate_short_fallback(input_file, output_file, index, project_folder, final_folder, no_face_mode=no_face_mode)
1205
  detected_mode = "1"
1206
  success = True
1207
 
scripts/one_face.py CHANGED
@@ -107,3 +107,33 @@ def detect_face_or_body(frame, face_detection, face_mesh, pose):
107
  # Se nada for detectado, retornar uma lista vazia
108
  return detections if detections else None
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # Se nada for detectado, retornar uma lista vazia
108
  return detections if detections else None
109
 
110
+
111
+ def crop_center_zoom(frame):
112
+ """
113
+ Crops the center of the frame to fill 9:16 aspect ratio (Zoom effect).
114
+ """
115
+ frame_height, frame_width = frame.shape[:2]
116
+ target_aspect_ratio = 9 / 16
117
+
118
+ # Calculate crop dimensions to FILL the target ratio
119
+ if frame_width / frame_height > target_aspect_ratio:
120
+ # Source is wider than target (e.g. 16:9 source, 9:16 target) -> Crop Width
121
+ new_width = int(frame_height * target_aspect_ratio)
122
+ new_height = frame_height
123
+ else:
124
+ # Source is taller than target -> Crop Height
125
+ new_width = frame_width
126
+ new_height = int(frame_width / target_aspect_ratio)
127
+
128
+ start_x = (frame_width - new_width) // 2
129
+ start_y = (frame_height - new_height) // 2
130
+
131
+ # Ensure bounds
132
+ start_x = max(0, start_x)
133
+ start_y = max(0, start_y)
134
+
135
+ crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width]
136
+
137
+ # Resize to final 1080x1920
138
+ return cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA)
139
+