leicam commited on
Commit
603b064
·
verified ·
1 Parent(s): f248bc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +340 -261
app.py CHANGED
@@ -2,15 +2,15 @@ import os
2
  import re
3
  import xml.etree.ElementTree as ET
4
  from dataclasses import dataclass
5
- from typing import List
6
  import gradio as gr
7
 
8
- # Optional LLM (Gemini)
9
  USE_LLM_DEFAULT = True
10
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
11
 
12
  LLM_AVAILABLE = False
13
- LLM_MODEL_NAME = "gemini-2.5-flash"
14
  try:
15
  if GEMINI_API_KEY:
16
  import google.generativeai as genai
@@ -37,7 +37,7 @@ class Segment:
37
  text: str
38
  score: float
39
 
40
- # ---- Timecode helpers ----
41
  def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
42
  m = re.match(r"^\s*(\d{2}):(\d{2}):(\d{2})[:;](\d{2})\s*$", tc)
43
  if not m:
@@ -54,12 +54,20 @@ def frames_to_timecode(frames: int, fps: int = FPS) -> str:
54
  ff = rem % fps
55
  return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
56
 
57
- # ---- Transcript parsing & scoring ----
 
 
 
 
 
 
58
  def parse_transcript(txt: str) -> List[Segment]:
59
  lines = [l.strip() for l in txt.splitlines() if l.strip()]
60
  results: List[Segment] = []
 
61
  pat_range = re.compile(r"^\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s+(.*)$")
62
  pat_point = re.compile(r"^(\d{2}:\d{2}:\d{2}[:;]\d{2})\s+(.*)$")
 
63
  for l in lines:
64
  m = pat_range.match(l)
65
  if m:
@@ -72,6 +80,7 @@ def parse_transcript(txt: str) -> List[Segment]:
72
  except Exception:
73
  continue
74
  continue
 
75
  m = pat_point.match(l)
76
  if m:
77
  s, text = m.groups()
@@ -82,32 +91,11 @@ def parse_transcript(txt: str) -> List[Segment]:
82
  results.append(Segment(s, e, s_f, e_f, text, 0.0))
83
  except Exception:
84
  continue
85
- return results
86
-
87
- def keyword_score(text: str, custom_keywords: str = "", weight_emotion: float = 2.0,
88
- weight_break: float = 1.5, weight_learn: float = 1.2, weight_viral: float = 1.0) -> float:
89
- t = text.lower()
90
- kw_emotion = ["medo", "coragem", "raiva", "chorei", "feliz", "triste", "emocion", "culpa", "vergonha", "orgulho"]
91
- kw_break = ["nunca", "de repente", "contraintuitivo", "ninguém te conta", "parei", "decidi", "quebrei", "virada"]
92
- kw_learn = ["aprendi", "descobri", "lição", "entendi", "percebi", "insight", "melhorou", "piorou"]
93
- kw_viral = ["segredo", "verdade", "por trás", "3 passos", "passo a passo", "como eu", "ninguém fala"]
94
-
95
- score = 0.0
96
- for kw in kw_emotion: score += weight_emotion if kw in t else 0.0
97
- for kw in kw_break: score += weight_break if kw in t else 0.0
98
- for kw in kw_learn: score += weight_learn if kw in t else 0.0
99
- for kw in kw_viral: score += weight_viral if kw in t else 0.0
100
-
101
- if custom_keywords.strip():
102
- custom_kw_list = [kw.strip().lower() for kw in custom_keywords.split(",") if kw.strip()]
103
- for kw in custom_kw_list:
104
- score += 3.0 if kw in t else 0.0
105
 
106
- score += 0.2 * text.count("!")
107
- score += 0.0005 * len(text)
108
- return score
109
 
110
- def parse_manual_timecodes(manual_input: str) -> List[tuple]:
 
111
  manual_ranges = []
112
  normalized = manual_input.replace(",", "\n")
113
  lines = [l.strip() for l in normalized.splitlines() if l.strip()]
@@ -122,90 +110,216 @@ def parse_manual_timecodes(manual_input: str) -> List[tuple]:
122
 
123
  return manual_ranges
124
 
125
- def llm_process_natural_instructions(transcript_txt: str, natural_instructions: str, num_segments: int) -> List[Segment]:
126
- if not LLM_AVAILABLE:
127
- raise ValueError("LLM não disponível. Configure GEMINI_API_KEY para usar instruções em linguagem natural.")
 
 
 
 
 
 
 
 
 
128
 
129
- segs = parse_transcript(transcript_txt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  if not segs:
131
- raise ValueError("Nenhum trecho válido encontrado na transcrição.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- segments_text = "\n".join([
134
- f"{i}. [{s.start_tc} - {s.end_tc}] {s.text}"
135
- for i, s in enumerate(segs)
136
- ])
 
 
 
 
 
 
 
 
137
 
138
- prompt = f"""Você é um editor de vídeo profissional. Analise a transcrição abaixo e as instruções do usuário.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  INSTRUÇÕES DO USUÁRIO:
141
- {natural_instructions}
142
 
143
- TRANSCRIÇÃO COM TIMECODES:
144
- {segments_text}
 
 
 
 
145
 
146
  TAREFA:
147
- 1. Interprete as instruções do usuário
148
- 2. Selecione os {num_segments} trechos que melhor atendem às instruções
149
- 3. Se a instrução for para REMOVER algo, selecione os trechos que NÃO contêm aquilo
150
- 4. Se a instrução for para INCLUIR algo específico, selecione apenas os trechos que contêm aquilo
151
- 5. Priorize trechos com narrativa coerente e impactantes
152
 
153
- RESPONDA APENAS com os índices dos trechos selecionados, separados por vírgula (ex: 0,3,5,8,12).
154
- Não adicione explicações, apenas os números."""
155
 
156
  try:
157
- response = LLM.generate_content(prompt, generation_config={"temperature": 0.3})
158
  txt = (response.text or "").strip()
159
 
160
- idxs = [int(x) for x in re.findall(r"\d+", txt)]
161
- idxs = [i for i in idxs if 0 <= i < len(segs)]
162
 
163
- if not idxs:
164
- raise ValueError("LLM não retornou índices válidos")
 
 
 
 
 
165
 
166
- selected = [segs[i] for i in idxs[:num_segments]]
167
- selected.sort(key=lambda x: x.start_f)
 
 
168
 
169
- return selected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  except Exception as e:
172
- raise ValueError(f"Erro ao processar instruções com LLM: {e}")
 
173
 
174
- def llm_rank_segments(candidates: List[Segment], num_segments: int, custom_instructions: str = "") -> List[Segment]:
175
- if not LLM_AVAILABLE:
176
- return candidates[:num_segments]
177
-
178
- sample = "\n".join([f"{i}. [{c.start_tc}-{c.end_tc}] {c.text[:300]}" for i, c in enumerate(candidates)])
179
-
180
- base_prompt = (
181
- f"Você é um editor profissional. Selecione exatamente {num_segments} trechos mais fortes "
182
- "pela emoção, quebra de expectativa e aprendizado, mantendo uma mini-narrativa coerente.\n\n"
183
- )
184
-
185
- if custom_instructions.strip():
186
- base_prompt += f"INSTRUÇÕES ADICIONAIS: {custom_instructions}\n\n"
187
 
188
- base_prompt += "Responda apenas com índices (0-based) separados por vírgula.\n\n" + sample
 
 
 
 
 
 
 
 
 
 
189
 
190
- try:
191
- r = LLM.generate_content(base_prompt, generation_config={"temperature": 0.2})
192
- txt = (r.text or "").strip()
193
- idxs = [int(x) for x in re.findall(r"\d+", txt)]
194
- idxs = [i for i in idxs if 0 <= i < len(candidates)]
195
- if len(idxs) >= num_segments:
196
- return [candidates[i] for i in idxs[:num_segments]]
197
- elif len(idxs) > 0:
198
- return [candidates[i] for i in idxs]
199
- except Exception as e:
200
- print(f"Erro no LLM: {e}")
201
 
202
- return candidates[:num_segments]
 
203
 
 
204
  def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
205
  custom_keywords: str, manual_timecodes: str, natural_instructions: str,
206
  weight_emotion: float, weight_break: float,
207
  weight_learn: float, weight_viral: float) -> List[Segment]:
208
 
 
209
  manual_ranges = parse_manual_timecodes(manual_timecodes)
210
  if manual_ranges:
211
  result_segs = []
@@ -215,46 +329,42 @@ def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
215
  end_f = parse_timecode_to_frames(end_tc)
216
  if end_f > start_f:
217
  result_segs.append(Segment(
218
- start_tc=start_tc,
219
- end_tc=end_tc,
220
- start_f=start_f,
221
- end_f=end_f,
222
- text=f"Corte manual {start_tc} - {end_tc}",
223
  score=100.0
224
  ))
225
  except Exception as e:
226
- print(f"Erro ao processar timecode manual {start_tc}-{end_tc}: {e}")
227
- continue
228
-
229
- if not result_segs:
230
- raise ValueError("Nenhum timecode manual válido encontrado.")
231
-
232
- return result_segs
233
-
234
- if natural_instructions.strip() and use_llm and LLM_AVAILABLE:
235
- return llm_process_natural_instructions(transcript_txt, natural_instructions, num_segments)
236
 
 
237
  segs = parse_transcript(transcript_txt)
238
  if not segs:
239
- raise ValueError("Nenhum trecho válido encontrado na transcrição.")
240
 
241
- for s in segs:
242
- s.score = keyword_score(s.text, custom_keywords, weight_emotion, weight_break, weight_learn, weight_viral)
243
 
244
- segs.sort(key=lambda x: x.score, reverse=True)
245
- top = segs[:min(20, len(segs))]
 
 
 
 
 
246
 
247
- if use_llm and LLM_AVAILABLE:
248
- ranked = llm_rank_segments(top, num_segments, "")
249
- return ranked
250
 
251
- return top[:num_segments]
 
252
 
253
- # ---- XML editing ----
254
  def get_sequence(root: ET.Element) -> ET.Element:
255
  seq = root.find(".//sequence")
256
  if seq is None:
257
- raise ValueError("Nenhuma <sequence> encontrada no XML.")
258
  return seq
259
 
260
  def ensure_rate_24fps(element: ET.Element):
@@ -265,10 +375,6 @@ def ensure_rate_24fps(element: ET.Element):
265
  if tb is None:
266
  tb = ET.SubElement(rate, "timebase")
267
  tb.text = str(FPS)
268
- ntsc = rate.find("ntsc")
269
- if ntsc is None:
270
- ntsc = ET.SubElement(rate, "ntsc")
271
- ntsc.text = "FALSE"
272
 
273
  def deep_copy(elem: ET.Element) -> ET.Element:
274
  new = ET.Element(elem.tag, attrib=elem.attrib)
@@ -278,22 +384,8 @@ def deep_copy(elem: ET.Element) -> ET.Element:
278
  new.append(deep_copy(child))
279
  return new
280
 
281
- def clear_clipitems(track_elem: ET.Element):
282
- for ci in list(track_elem.findall("./clipitem")):
283
- track_elem.remove(ci)
284
-
285
- def first_clipitem_ref(track_elem: ET.Element):
286
- return track_elem.find("./clipitem")
287
-
288
- def copy_file_ref(from_clip: ET.Element, to_clip: ET.Element):
289
- src_file = from_clip.find("./file")
290
- if src_file is not None:
291
- old = to_clip.find("./file")
292
- if old is not None:
293
- to_clip.remove(old)
294
- to_clip.append(deep_copy(src_file))
295
-
296
- def build_clipitem(template_ci: ET.Element, cid: str, start_f: int, end_f: int, in_f: int, out_f: int, linked_ids):
297
  ci = ET.Element("clipitem", {"id": cid})
298
  name = template_ci.find("name")
299
  ci_name = ET.SubElement(ci, "name")
@@ -307,204 +399,191 @@ def build_clipitem(template_ci: ET.Element, cid: str, start_f: int, end_f: int,
307
  t = ET.SubElement(ci, tag)
308
  t.text = str(val)
309
 
310
- copy_file_ref(template_ci, ci)
 
 
311
 
312
  for lid in linked_ids:
313
  link = ET.SubElement(ci, "link")
314
  linkclipref = ET.SubElement(link, "linkclipref")
315
  linkclipref.text = lid
316
- mediatype = ET.SubElement(link, "mediatype")
317
- mediatype.text = "video" if "-v" in lid else "audio"
318
  return ci
319
 
320
  def edit_sequence_with_segments(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
321
  root = tree.getroot()
322
  seq = get_sequence(root)
323
- ensure_rate_24fps(seq)
324
-
325
  video_track = seq.find("./media/video/track")
326
  audio_track = seq.find("./media/audio/track")
327
-
328
- if video_track is None or audio_track is None:
329
- raise ValueError("Estrutura de trilhas não encontrada.")
330
-
331
- v_tpl = first_clipitem_ref(video_track)
332
- a_tpl = first_clipitem_ref(audio_track)
333
- if v_tpl is None or a_tpl is None:
334
- raise ValueError("Não há clipitem de referência em V1 e/ou A1.")
335
-
336
- clear_clipitems(video_track)
337
- clear_clipitems(audio_track)
338
-
 
 
339
  cursor = 0
340
  for idx, s in enumerate(segs, start=1):
341
  dur = s.end_f - s.start_f
342
- start = cursor
343
- end = cursor + dur
344
-
345
- v_id = f"clipitem-v-cut{idx}"
346
- a_id = f"clipitem-a-cut{idx}"
347
-
348
  v_ci = build_clipitem(v_tpl, v_id, start, end, s.start_f, s.end_f, [a_id])
349
  a_ci = build_clipitem(a_tpl, a_id, start, end, s.start_f, s.end_f, [v_id])
350
-
351
  video_track.append(v_ci)
352
  audio_track.append(a_ci)
353
-
354
  cursor = end
355
-
356
  return tree
357
 
358
- # ---- Gradio app ----
359
- def process_xml_and_transcript(premiere_xml_file, transcript_txt_file, use_llm,
360
- num_segments, custom_keywords, manual_timecodes, natural_instructions,
361
  weight_emotion, weight_break, weight_learn, weight_viral):
362
- if premiere_xml_file is None:
363
- return "Envie o XML do Premiere.", None, f"LLM disponível: {LLM_AVAILABLE}"
364
 
365
  manual_ranges = parse_manual_timecodes(manual_timecodes)
366
- has_natural_instructions = natural_instructions.strip() != ""
367
 
 
368
  if manual_ranges:
369
  mode = "MANUAL"
370
  transcript = ""
371
- elif has_natural_instructions:
372
- mode = "INSTRUÇÕES NATURAIS (IA)"
373
- if transcript_txt_file is None:
374
- return "Para usar instruções em linguagem natural, envie a transcrição.", None, f"LLM disponível: {LLM_AVAILABLE}"
375
  if not LLM_AVAILABLE:
376
- return "LLM não disponível. Configure GEMINI_API_KEY para usar instruções naturais.", None, f"LLM disponível: {LLM_AVAILABLE}"
377
- with open(transcript_txt_file.name, "r", encoding="utf-8") as f:
378
  transcript = f.read()
379
  else:
380
  mode = "AUTOMÁTICO"
381
- if transcript_txt_file is None:
382
- return "Envie a transcrição em .txt ou forneça minutagens manuais.", None, f"LLM disponível: {LLM_AVAILABLE}"
383
- with open(transcript_txt_file.name, "r", encoding="utf-8") as f:
384
  transcript = f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
- segs = select_segments(transcript, use_llm and LLM_AVAILABLE, num_segments,
387
- custom_keywords, manual_timecodes, natural_instructions,
388
- weight_emotion, weight_break, weight_learn, weight_viral)
389
-
390
- tree = ET.parse(premiere_xml_file.name)
391
- tree = edit_sequence_with_segments(tree, segs)
392
-
393
- base = os.path.splitext(os.path.basename(premiere_xml_file.name))[0]
394
- out_path = os.path.join(OUTPUT_DIR, f"{base}_EDITADO.xml")
395
- tree.write(out_path, encoding="utf-8", xml_declaration=True)
396
-
397
- resumo = f"✂️ {len(segs)} cortes aplicados - Modo: {mode} (24 fps):\n\n"
398
- for i, s in enumerate(segs, 1):
399
- dur_sec = (s.end_f - s.start_f) / FPS
400
- resumo += f"{i}. {s.start_tc} → {s.end_tc} ({dur_sec:.1f}s)\n"
401
- if not manual_ranges:
402
- resumo += f" Score: {s.score:.1f} | {s.text[:150]}\n"
403
- resumo += "\n"
404
-
405
- status = f"✓ Modo: {mode} | LLM disponível: {LLM_AVAILABLE} | LLM usado: {use_llm and LLM_AVAILABLE}"
406
- return resumo, out_path, status
407
-
408
  css = """
409
  :root {
410
- --neon: #39FF14;
411
- --txt: #1a1a1a;
412
- --muted: #4b5563;
413
- --line: #d1d5db;
414
  }
415
-
416
  .gradio-container {
417
- font-family: 'Manrope', system-ui, sans-serif !important;
418
- background: linear-gradient(135deg, rgba(57,255,20,0.03) 0%, #fff 100%);
419
  }
420
-
421
- .gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container label {
422
- color: var(--txt) !important;
423
- font-weight: 700 !important;
424
  }
425
-
426
  .gradio-container button.primary {
427
- background: var(--neon) !important;
428
  color: #000 !important;
429
- font-weight: 800 !important;
430
- border-radius: 10px !important;
431
- }
432
-
433
- .gradio-container input, .gradio-container textarea {
434
- color: var(--txt) !important;
435
- border-radius: 12px !important;
436
- }
437
-
438
- .gradio-container input[type="checkbox"]:checked {
439
- background: var(--neon) !important;
440
  }
441
  """
442
 
 
443
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
444
- gr.HTML("""
445
- <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;700;800&display=swap" rel="stylesheet">
446
- <div style="text-align: center; padding: 24px 0;">
447
- <h1 style="color: #1a1a1a; font-weight: 800;">Agente de Edição XML · Premiere</h1>
448
- <p style="color: #4b5563;">Edite sua sequência do Premiere com controle total</p>
449
- </div>
450
- """)
451
 
452
  with gr.Row():
453
- with gr.Column():
454
- xml_in = gr.File(label="XML da sequência (FCP XML)", file_types=[".xml"])
455
- txt_in = gr.File(label="Transcrição (.txt)", file_types=[".txt"])
456
-
457
- with gr.Column():
458
- use_llm = gr.Checkbox(
459
- label="Usar Potência Criativa (IA)",
460
- value=USE_LLM_DEFAULT and LLM_AVAILABLE
461
- )
462
- num_segments = gr.Slider(
463
- minimum=2, maximum=10, step=1, value=5,
464
- label="Número de segmentos"
465
- )
466
 
467
- with gr.Accordion("INSTRUÇÕES EM LINGUAGEM NATURAL (IA)", open=True):
 
 
 
 
 
 
468
  natural_instructions = gr.Textbox(
469
- label="Suas instruções para a IA",
470
- placeholder='Exemplos:\n"Separe os 5 melhores momentos"\n"Recorte apenas a parte sobre medo"\n"Remova quando fala almôndega"',
471
- lines=4
472
  )
473
 
474
- with gr.Accordion("MINUTAGENS MANUAIS", open=False):
475
  manual_timecodes = gr.Textbox(
476
- label="Cole os timecodes exatos",
477
  placeholder="00:01:23:15 - 00:02:45:10\n00:05:30:00 - 00:07:15:22",
478
- lines=5
479
- )
480
-
481
- with gr.Accordion("Palavras-chave Personalizadas", open=False):
482
- custom_keywords = gr.Textbox(
483
- label="Palavras-chave (separadas por vírgula)",
484
- placeholder="transformação, resultado, método"
485
  )
486
 
487
- with gr.Accordion("Ajuste de Pesos", open=False):
 
488
  with gr.Row():
489
- weight_emotion = gr.Slider(0, 5, value=2.0, step=0.1, label="Emoção")
490
- weight_break = gr.Slider(0, 5, value=1.5, step=0.1, label="Quebra")
491
  with gr.Row():
492
- weight_learn = gr.Slider(0, 5, value=1.2, step=0.1, label="Aprendizado")
493
- weight_viral = gr.Slider(0, 5, value=1.0, step=0.1, label="Viral")
494
 
495
- run_btn = gr.Button("Processar e Gerar XML Editado", variant="primary", size="lg")
496
 
497
  with gr.Row():
498
  with gr.Column(scale=2):
499
- resumo_out = gr.Textbox(label="Resumo dos cortes", lines=15)
500
  with gr.Column(scale=1):
501
  status_out = gr.Textbox(label="Status")
502
- file_out = gr.File(label="Download do XML")
503
 
504
  run_btn.click(
505
- process_xml_and_transcript,
506
- inputs=[xml_in, txt_in, use_llm, num_segments, custom_keywords,
507
- manual_timecodes, natural_instructions, weight_emotion, weight_break, weight_learn, weight_viral],
 
508
  outputs=[resumo_out, file_out, status_out]
509
  )
510
 
 
2
  import re
3
  import xml.etree.ElementTree as ET
4
  from dataclasses import dataclass
5
+ from typing import List, Tuple
6
  import gradio as gr
7
 
8
+ # LLM Configuration
9
  USE_LLM_DEFAULT = True
10
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
11
 
12
  LLM_AVAILABLE = False
13
+ LLM_MODEL_NAME = "gemini-2.0-flash-exp"
14
  try:
15
  if GEMINI_API_KEY:
16
  import google.generativeai as genai
 
37
  text: str
38
  score: float
39
 
40
+ # ============ TIMECODE FUNCTIONS ============
41
  def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
42
  m = re.match(r"^\s*(\d{2}):(\d{2}):(\d{2})[:;](\d{2})\s*$", tc)
43
  if not m:
 
54
  ff = rem % fps
55
  return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
56
 
57
+ def frames_to_seconds(frames: int, fps: int = FPS) -> float:
58
+ return frames / fps
59
+
60
+ def seconds_to_frames(seconds: float, fps: int = FPS) -> int:
61
+ return int(seconds * fps)
62
+
63
+ # ============ TRANSCRIPT PARSING ============
64
  def parse_transcript(txt: str) -> List[Segment]:
65
  lines = [l.strip() for l in txt.splitlines() if l.strip()]
66
  results: List[Segment] = []
67
+
68
  pat_range = re.compile(r"^\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s+(.*)$")
69
  pat_point = re.compile(r"^(\d{2}:\d{2}:\d{2}[:;]\d{2})\s+(.*)$")
70
+
71
  for l in lines:
72
  m = pat_range.match(l)
73
  if m:
 
80
  except Exception:
81
  continue
82
  continue
83
+
84
  m = pat_point.match(l)
85
  if m:
86
  s, text = m.groups()
 
91
  results.append(Segment(s, e, s_f, e_f, text, 0.0))
92
  except Exception:
93
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ return results
 
 
96
 
97
+ # ============ MANUAL TIMECODES ============
98
+ def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
99
  manual_ranges = []
100
  normalized = manual_input.replace(",", "\n")
101
  lines = [l.strip() for l in normalized.splitlines() if l.strip()]
 
110
 
111
  return manual_ranges
112
 
113
+ # ============ SEGMENT PROCESSING ============
114
+ def get_total_duration(segs: List[Segment]) -> float:
115
+ """Retorna duração total em segundos"""
116
+ return sum((s.end_f - s.start_f) / FPS for s in segs)
117
+
118
+ def create_target_selection(segs: List[Segment], target_minutes: float, strategy: str = "distributed") -> List[Segment]:
119
+ """
120
+ Cria uma seleção de segmentos para atingir duração alvo.
121
+ strategy: 'distributed' = espalhado pelo vídeo, 'sequential' = em sequência
122
+ """
123
+ target_seconds = target_minutes * 60
124
+ total_available = get_total_duration(segs)
125
 
126
+ if target_seconds > total_available:
127
+ print(f"Aviso: Duração solicitada ({target_minutes:.1f}min) maior que disponível ({total_available/60:.1f}min)")
128
+ return segs
129
+
130
+ if strategy == "distributed":
131
+ # Distribui seleção ao longo do vídeo
132
+ ratio = target_seconds / total_available
133
+ selected = []
134
+ current_duration = 0
135
+
136
+ # Seleciona proporcionalmente de cada parte
137
+ for seg in segs:
138
+ if current_duration >= target_seconds:
139
+ break
140
+ seg_duration = (seg.end_f - seg.start_f) / FPS
141
+ if ratio >= 0.8 or (current_duration + seg_duration <= target_seconds * 1.1):
142
+ selected.append(seg)
143
+ current_duration += seg_duration
144
+
145
+ return selected
146
+
147
+ else: # sequential
148
+ selected = []
149
+ current_duration = 0
150
+
151
+ for seg in segs:
152
+ if current_duration >= target_seconds:
153
+ break
154
+ selected.append(seg)
155
+ current_duration += (seg.end_f - seg.start_f) / FPS
156
+
157
+ return selected
158
+
159
+ def merge_close_segments(segs: List[Segment], max_gap_seconds: float = 3.0) -> List[Segment]:
160
+ """Mescla segmentos que estão próximos um do outro"""
161
  if not segs:
162
+ return []
163
+
164
+ segs_sorted = sorted(segs, key=lambda x: x.start_f)
165
+ merged = [segs_sorted[0]]
166
+ max_gap_frames = int(max_gap_seconds * FPS)
167
+
168
+ for current in segs_sorted[1:]:
169
+ last = merged[-1]
170
+ gap = current.start_f - last.end_f
171
+
172
+ if gap <= max_gap_frames and gap >= 0:
173
+ # Mescla os segmentos
174
+ merged[-1] = Segment(
175
+ start_tc=last.start_tc,
176
+ end_tc=current.end_tc,
177
+ start_f=last.start_f,
178
+ end_f=current.end_f,
179
+ text=last.text + " [...] " + current.text,
180
+ score=(last.score + current.score) / 2
181
+ )
182
+ else:
183
+ merged.append(current)
184
 
185
+ return merged
186
+
187
+ # ============ AI PROCESSING ============
188
+ def extract_duration_from_instructions(instructions: str) -> float:
189
+ """Extrai duração em minutos das instruções do usuário"""
190
+ # Procura por padrões como "10 minutos", "5 min", "15 minutes"
191
+ patterns = [
192
+ r"(\d+)\s*minutos?",
193
+ r"(\d+)\s*min\b",
194
+ r"(\d+)\s*minutes?",
195
+ r"(\d+)m\b"
196
+ ]
197
 
198
+ for pattern in patterns:
199
+ match = re.search(pattern, instructions.lower())
200
+ if match:
201
+ return float(match.group(1))
202
+
203
+ return None
204
+
205
+ def ai_select_segments(segs: List[Segment], instructions: str) -> List[Segment]:
206
+ """Usa IA para selecionar segmentos baseado em instruções"""
207
+ if not LLM_AVAILABLE:
208
+ raise ValueError("IA não disponível. Configure GEMINI_API_KEY")
209
+
210
+ total_duration_min = get_total_duration(segs) / 60
211
+ target_duration = extract_duration_from_instructions(instructions)
212
+
213
+ # Cria resumo dos segmentos (agrupados para prompt menor)
214
+ segment_summary = []
215
+ for i in range(0, len(segs), 5):
216
+ group = segs[i:i+5]
217
+ start_tc = group[0].start_tc
218
+ end_tc = group[-1].end_tc
219
+ duration = sum((s.end_f - s.start_f) / FPS for s in group)
220
+ combined_text = " ".join([s.text[:100] for s in group])
221
+ segment_summary.append(f"Grupo {i//5}: [{start_tc}-{end_tc}] ({duration:.0f}s) {combined_text[:200]}")
222
+
223
+ prompt = f"""Você é um editor de vídeo profissional.
224
 
225
  INSTRUÇÕES DO USUÁRIO:
226
+ {instructions}
227
 
228
+ INFORMAÇÕES:
229
+ - Total disponível: {total_duration_min:.1f} minutos ({len(segs)} segmentos)
230
+ - Duração alvo detectada: {target_duration if target_duration else 'não especificada'} minutos
231
+
232
+ SEGMENTOS (agrupados de 5 em 5):
233
+ {chr(10).join(segment_summary[:50])}
234
 
235
  TAREFA:
236
+ 1. Identifique quais GRUPOS de segmentos atendem às instruções
237
+ 2. Se foi solicitada duração específica, selecione grupos suficientes para atingi-la
238
+ 3. Distribua a seleção: pegue grupos do INÍCIO, MEIO e FIM do vídeo
239
+ 4. Retorne os NÚMEROS dos grupos selecionados
 
240
 
241
+ RESPONDA APENAS com números separados por vírgula (ex: 0,2,5,8,12,15,20,25,30)
242
+ Selecione pelo menos 10-20 grupos para ter duração adequada."""
243
 
244
  try:
245
+ response = LLM.generate_content(prompt, generation_config={"temperature": 0.4, "max_output_tokens": 500})
246
  txt = (response.text or "").strip()
247
 
248
+ # Extrai números dos grupos
249
+ group_indices = [int(x) for x in re.findall(r"\d+", txt)]
250
 
251
+ # Converte grupos em segmentos individuais
252
+ selected_segs = []
253
+ for group_idx in group_indices:
254
+ start_idx = group_idx * 5
255
+ end_idx = min(start_idx + 5, len(segs))
256
+ if start_idx < len(segs):
257
+ selected_segs.extend(segs[start_idx:end_idx])
258
 
259
+ if not selected_segs:
260
+ # Fallback: pega distribuído
261
+ step = max(1, len(segs) // 30)
262
+ selected_segs = segs[::step]
263
 
264
+ # Remove duplicatas e ordena
265
+ seen = set()
266
+ unique_segs = []
267
+ for seg in selected_segs:
268
+ key = (seg.start_f, seg.end_f)
269
+ if key not in seen:
270
+ seen.add(key)
271
+ unique_segs.append(seg)
272
+
273
+ unique_segs.sort(key=lambda x: x.start_f)
274
+
275
+ # Ajusta para duração alvo se especificada
276
+ if target_duration:
277
+ unique_segs = create_target_selection(unique_segs, target_duration, "distributed")
278
+
279
+ # Mescla segmentos próximos
280
+ final_segs = merge_close_segments(unique_segs, max_gap_seconds=3.0)
281
+
282
+ final_duration = get_total_duration(final_segs)
283
+ print(f"✓ Selecionados {len(final_segs)} trechos, duração total: {final_duration/60:.1f} min")
284
+
285
+ return final_segs
286
 
287
  except Exception as e:
288
+ print(f"Erro na IA: {e}")
289
+ raise
290
 
291
+ # ============ KEYWORD SCORING ============
292
+ def keyword_score(text: str, custom_keywords: str = "", weights: dict = None) -> float:
293
+ if weights is None:
294
+ weights = {"emotion": 2.0, "break": 1.5, "learn": 1.2, "viral": 1.0}
 
 
 
 
 
 
 
 
 
295
 
296
+ t = text.lower()
297
+ kw_emotion = ["medo", "coragem", "raiva", "chorei", "feliz", "triste", "emocion"]
298
+ kw_break = ["nunca", "de repente", "contraintuitivo", "virada"]
299
+ kw_learn = ["aprendi", "descobri", "lição", "entendi", "percebi"]
300
+ kw_viral = ["segredo", "verdade", "3 passos", "como eu"]
301
+
302
+ score = 0.0
303
+ for kw in kw_emotion: score += weights["emotion"] if kw in t else 0.0
304
+ for kw in kw_break: score += weights["break"] if kw in t else 0.0
305
+ for kw in kw_learn: score += weights["learn"] if kw in t else 0.0
306
+ for kw in kw_viral: score += weights["viral"] if kw in t else 0.0
307
 
308
+ if custom_keywords.strip():
309
+ for kw in custom_keywords.split(","):
310
+ if kw.strip().lower() in t:
311
+ score += 3.0
 
 
 
 
 
 
 
312
 
313
+ score += 0.2 * text.count("!")
314
+ return score
315
 
316
+ # ============ MAIN SELECTION LOGIC ============
317
  def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
318
  custom_keywords: str, manual_timecodes: str, natural_instructions: str,
319
  weight_emotion: float, weight_break: float,
320
  weight_learn: float, weight_viral: float) -> List[Segment]:
321
 
322
+ # Priority 1: Manual timecodes
323
  manual_ranges = parse_manual_timecodes(manual_timecodes)
324
  if manual_ranges:
325
  result_segs = []
 
329
  end_f = parse_timecode_to_frames(end_tc)
330
  if end_f > start_f:
331
  result_segs.append(Segment(
332
+ start_tc=start_tc, end_tc=end_tc,
333
+ start_f=start_f, end_f=end_f,
334
+ text=f"Manual: {start_tc} - {end_tc}",
 
 
335
  score=100.0
336
  ))
337
  except Exception as e:
338
+ print(f"Erro: {e}")
339
+ return result_segs if result_segs else []
 
 
 
 
 
 
 
 
340
 
341
+ # Priority 2: AI with natural instructions
342
  segs = parse_transcript(transcript_txt)
343
  if not segs:
344
+ raise ValueError("Nenhum trecho encontrado na transcrição")
345
 
346
+ if natural_instructions.strip() and use_llm and LLM_AVAILABLE:
347
+ return ai_select_segments(segs, natural_instructions)
348
 
349
+ # Priority 3: Automatic scoring
350
+ weights = {
351
+ "emotion": weight_emotion,
352
+ "break": weight_break,
353
+ "learn": weight_learn,
354
+ "viral": weight_viral
355
+ }
356
 
357
+ for s in segs:
358
+ s.score = keyword_score(s.text, custom_keywords, weights)
 
359
 
360
+ segs.sort(key=lambda x: x.score, reverse=True)
361
+ return segs[:num_segments]
362
 
363
+ # ============ XML EDITING ============
364
  def get_sequence(root: ET.Element) -> ET.Element:
365
  seq = root.find(".//sequence")
366
  if seq is None:
367
+ raise ValueError("Nenhuma <sequence> encontrada")
368
  return seq
369
 
370
  def ensure_rate_24fps(element: ET.Element):
 
375
  if tb is None:
376
  tb = ET.SubElement(rate, "timebase")
377
  tb.text = str(FPS)
 
 
 
 
378
 
379
  def deep_copy(elem: ET.Element) -> ET.Element:
380
  new = ET.Element(elem.tag, attrib=elem.attrib)
 
384
  new.append(deep_copy(child))
385
  return new
386
 
387
+ def build_clipitem(template_ci: ET.Element, cid: str, start_f: int, end_f: int,
388
+ in_f: int, out_f: int, linked_ids):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
  ci = ET.Element("clipitem", {"id": cid})
390
  name = template_ci.find("name")
391
  ci_name = ET.SubElement(ci, "name")
 
399
  t = ET.SubElement(ci, tag)
400
  t.text = str(val)
401
 
402
+ src_file = template_ci.find("./file")
403
+ if src_file is not None:
404
+ ci.append(deep_copy(src_file))
405
 
406
  for lid in linked_ids:
407
  link = ET.SubElement(ci, "link")
408
  linkclipref = ET.SubElement(link, "linkclipref")
409
  linkclipref.text = lid
410
+
 
411
  return ci
412
 
413
  def edit_sequence_with_segments(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
414
  root = tree.getroot()
415
  seq = get_sequence(root)
416
+
 
417
  video_track = seq.find("./media/video/track")
418
  audio_track = seq.find("./media/audio/track")
419
+
420
+ if not video_track or not audio_track:
421
+ raise ValueError("Estrutura de trilhas não encontrada")
422
+
423
+ v_tpl = video_track.find("./clipitem")
424
+ a_tpl = audio_track.find("./clipitem")
425
+
426
+ # Limpa trilhas
427
+ for ci in list(video_track.findall("./clipitem")):
428
+ video_track.remove(ci)
429
+ for ci in list(audio_track.findall("./clipitem")):
430
+ audio_track.remove(ci)
431
+
432
+ # Adiciona novos clips
433
  cursor = 0
434
  for idx, s in enumerate(segs, start=1):
435
  dur = s.end_f - s.start_f
436
+ start, end = cursor, cursor + dur
437
+
438
+ v_id = f"clip-v-{idx}"
439
+ a_id = f"clip-a-{idx}"
440
+
 
441
  v_ci = build_clipitem(v_tpl, v_id, start, end, s.start_f, s.end_f, [a_id])
442
  a_ci = build_clipitem(a_tpl, a_id, start, end, s.start_f, s.end_f, [v_id])
443
+
444
  video_track.append(v_ci)
445
  audio_track.append(a_ci)
446
+
447
  cursor = end
448
+
449
  return tree
450
 
451
+ # ============ GRADIO INTERFACE ============
452
+ def process_xml_and_transcript(xml_file, txt_file, use_llm, num_segments,
453
+ custom_keywords, manual_timecodes, natural_instructions,
454
  weight_emotion, weight_break, weight_learn, weight_viral):
455
+ if not xml_file:
456
+ return "Envie o XML do Premiere", None, f"LLM: {LLM_AVAILABLE}"
457
 
458
  manual_ranges = parse_manual_timecodes(manual_timecodes)
459
+ has_instructions = natural_instructions.strip() != ""
460
 
461
+ # Determine mode
462
  if manual_ranges:
463
  mode = "MANUAL"
464
  transcript = ""
465
+ elif has_instructions:
466
+ mode = "IA (Linguagem Natural)"
467
+ if not txt_file:
468
+ return " Envie a transcrição para usar IA", None, f"LLM: {LLM_AVAILABLE}"
469
  if not LLM_AVAILABLE:
470
+ return " IA não disponível. Configure GEMINI_API_KEY", None, f"LLM: False"
471
+ with open(txt_file.name, "r", encoding="utf-8") as f:
472
  transcript = f.read()
473
  else:
474
  mode = "AUTOMÁTICO"
475
+ if not txt_file:
476
+ return "Envie a transcrição", None, f"LLM: {LLM_AVAILABLE}"
477
+ with open(txt_file.name, "r", encoding="utf-8") as f:
478
  transcript = f.read()
479
+
480
+ try:
481
+ segs = select_segments(transcript, use_llm and LLM_AVAILABLE, num_segments,
482
+ custom_keywords, manual_timecodes, natural_instructions,
483
+ weight_emotion, weight_break, weight_learn, weight_viral)
484
+
485
+ if not segs:
486
+ return "❌ Nenhum segmento selecionado", None, f"LLM: {LLM_AVAILABLE}"
487
+
488
+ tree = ET.parse(xml_file.name)
489
+ tree = edit_sequence_with_segments(tree, segs)
490
+
491
+ base = os.path.splitext(os.path.basename(xml_file.name))[0]
492
+ out_path = os.path.join(OUTPUT_DIR, f"{base}_EDITADO.xml")
493
+ tree.write(out_path, encoding="utf-8", xml_declaration=True)
494
+
495
+ total_duration = get_total_duration(segs)
496
+
497
+ resumo = f"✂️ {len(segs)} cortes | Duração: {total_duration/60:.1f} min | Modo: {mode}\n\n"
498
+ for i, s in enumerate(segs, 1):
499
+ dur = (s.end_f - s.start_f) / FPS
500
+ resumo += f"{i}. {s.start_tc} → {s.end_tc} ({dur:.1f}s)\n"
501
+ if s.text and not manual_ranges:
502
+ resumo += f" {s.text[:120]}...\n"
503
+ resumo += "\n"
504
+
505
+ status = f"✓ {mode} | Duração total: {total_duration/60:.1f} min | LLM: {LLM_AVAILABLE}"
506
+ return resumo, out_path, status
507
+
508
+ except Exception as e:
509
+ return f"❌ Erro: {str(e)}", None, f"LLM: {LLM_AVAILABLE}"
510
 
511
+ # ============ CSS ============
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
  css = """
513
  :root {
514
+ --primary: #39FF14;
515
+ --text: #1a1a1a;
516
+ --muted: #6b7280;
 
517
  }
 
518
  .gradio-container {
519
+ font-family: system-ui, sans-serif !important;
 
520
  }
521
+ .gradio-container h1, .gradio-container label {
522
+ color: var(--text) !important;
 
 
523
  }
 
524
  .gradio-container button.primary {
525
+ background: var(--primary) !important;
526
  color: #000 !important;
527
+ font-weight: 700 !important;
 
 
 
 
 
 
 
 
 
 
528
  }
529
  """
530
 
531
+ # ============ GRADIO APP ============
532
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
533
+ gr.Markdown("# Agente de Edição XML - Premiere Pro")
534
+ gr.Markdown("Edite sequências do Premiere com IA ou controle manual")
 
 
 
 
 
535
 
536
  with gr.Row():
537
+ xml_in = gr.File(label="XML do Premiere", file_types=[".xml"])
538
+ txt_in = gr.File(label="Transcrição (.txt)", file_types=[".txt"])
539
+
540
+ with gr.Row():
541
+ use_llm = gr.Checkbox(label="Usar IA", value=USE_LLM_DEFAULT and LLM_AVAILABLE)
542
+ num_segments = gr.Slider(2, 20, 5, step=1, label="Segmentos (modo automático)")
 
 
 
 
 
 
 
543
 
544
+ with gr.Accordion("IA - Linguagem Natural (RECOMENDADO)", open=True):
545
+ gr.Markdown("""
546
+ **Use linguagem natural para dar instruções:**
547
+ - "Crie um corte de 10 minutos com os melhores momentos"
548
+ - "Extraia 15 minutos das partes mais engraçadas"
549
+ - "Faça um resumo de 5 minutos sobre superação"
550
+ """)
551
  natural_instructions = gr.Textbox(
552
+ label="Instruções para a IA",
553
+ placeholder='Ex: "Crie um corte de 10 minutos com os melhores momentos distribuídos pelo vídeo"',
554
+ lines=3
555
  )
556
 
557
+ with gr.Accordion("Minutagens Manuais", open=False):
558
  manual_timecodes = gr.Textbox(
559
+ label="Timecodes exatos (um por linha)",
560
  placeholder="00:01:23:15 - 00:02:45:10\n00:05:30:00 - 00:07:15:22",
561
+ lines=4
 
 
 
 
 
 
562
  )
563
 
564
+ with gr.Accordion("Modo Automático (Palavras-chave)", open=False):
565
+ custom_keywords = gr.Textbox(label="Palavras-chave personalizadas (separadas por vírgula)")
566
  with gr.Row():
567
+ weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: Emoção")
568
+ weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: Quebra")
569
  with gr.Row():
570
+ weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: Aprendizado")
571
+ weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: Viral")
572
 
573
+ run_btn = gr.Button("Processar XML", variant="primary", size="lg")
574
 
575
  with gr.Row():
576
  with gr.Column(scale=2):
577
+ resumo_out = gr.Textbox(label="Resumo", lines=15)
578
  with gr.Column(scale=1):
579
  status_out = gr.Textbox(label="Status")
580
+ file_out = gr.File(label="Download")
581
 
582
  run_btn.click(
583
+ process_xml_and_transcript,
584
+ inputs=[xml_in, txt_in, use_llm, num_segments, custom_keywords,
585
+ manual_timecodes, natural_instructions,
586
+ weight_emotion, weight_break, weight_learn, weight_viral],
587
  outputs=[resumo_out, file_out, status_out]
588
  )
589