Spaces:

leicam
/

EditorAutomaticoXML

Running

App Files Files Community

leicam commited on Oct 7, 2025

Commit

b561d7a

verified ·

1 Parent(s): 8769bc9

Update app.py

Browse files

Files changed (1) hide show

app.py +506 -241

app.py CHANGED Viewed

@@ -50,12 +50,7 @@ class Segment:
 # Funções de Timecode
 # =========================
 def _tc_to_hmsf(tc: str, fps: int = FPS) -> Tuple[int, int, int, int]:
-    """
-    Converte timecode para (hh, mm, ss, ff). Aceita:
-    - HH:MM:SS:FF ou HH:MM:SS;FF
-    - HH:MM:SS[.,]mmm (milissegundos)
-    - H:MM:SS (sem frames)
-    """
     s = tc.strip()
     # HH:MM:SS:FF ou HH:MM:SS;FF
@@ -102,17 +97,7 @@ def frames_to_timecode(frames: int, fps: int = FPS) -> str:
 # Parser de Transcrição
 # =========================
 def parse_transcript(txt: str) -> List[Segment]:
-    """
-    Aceita múltiplos formatos:
-    A) Uma linha:  00:00:00:00 - 00:00:10:00 Texto...
-    B) Duas linhas: 00:00:00:00 - 00:00:10:00 \n Texto...
-    C) SRT/VTT com setas:
-       1
-       00:00:05,120 --> 00:00:08,300
-       Texto linha 1
-       Texto linha 2
-       [linha em branco]
-    """
     if not txt or not txt.strip():
         return []
@@ -135,7 +120,7 @@ def parse_transcript(txt: str) -> List[Segment]:
             i += 1
             continue
-        # Casos A e B (com traço)
         m = line_range.match(raw)
         if m:
             start_tc, end_tc, trailing_text = m.groups()
@@ -144,7 +129,6 @@ def parse_transcript(txt: str) -> List[Segment]:
             if trailing_text.strip():
                 text_parts.append(trailing_text.strip())
             else:
-                # Texto nas linhas seguintes até linha em branco ou novo bloco
                 j = i + 1
                 while j < len(lines):
                     nxt = lines[j].strip()
@@ -152,9 +136,9 @@ def parse_transcript(txt: str) -> List[Segment]:
                         break
                     if line_range.match(nxt):
                         break
-                    if re.match(r'^\d+\s*$', nxt):  # índice SRT
                         break
-                    if arrow.search(nxt):          # linha SRT com -->
                         break
                     text_parts.append(nxt)
                     j += 1
@@ -178,9 +162,8 @@ def parse_transcript(txt: str) -> List[Segment]:
             i += 1
             continue
-        # Caso C (SRT/VTT com -->)
         if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])):
-            # Se a linha atual não tem arrow, tente a próxima (muitos SRTs têm um índice numérico antes)
             line_with_tc = raw if arrow.search(raw) else lines[i + 1]
             mm = arrow.search(line_with_tc)
             if mm:
@@ -191,7 +174,6 @@ def parse_transcript(txt: str) -> List[Segment]:
                     nxt = lines[j].strip()
                     if not nxt:
                         break
-                    # próximo bloco: índice seguido de timecode
                     if re.match(r'^\d+\s*$', nxt) and (j + 1 < len(lines) and arrow.search(lines[j + 1])):
                         break
                     if arrow.search(nxt):
@@ -215,7 +197,6 @@ def parse_transcript(txt: str) -> List[Segment]:
                 except Exception:
                     pass
-                # Avança o ponteiro para depois do bloco
                 i = j + 1
                 continue
@@ -242,241 +223,410 @@ def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
 # =========================
-# Interpretação do Comando (NLP simples)
 # =========================
 @dataclass
 class CommandSpec:
-    total_segments: int            # quantidade de cortes
-    per_segment_seconds: Optional[int]  # duração por corte (segundos), se especificada
-    total_minutes: Optional[float] # duração total (minutos), alternativa ao per_segment_seconds
-    start_timecode: Optional[str]  # início explícito
-    keywords: List[str]            # termos para achar o começo
-    use_best_moments: bool         # flag para "melhores momentos"
 def parse_natural_command(text: str) -> CommandSpec:
-    """
-    Extrai:
-      - quantidade de cortes: "3 cortes", "crie 2"
-      - duração por corte: "cortes de 30s", "clipes de 1min", "1 minuto"
-      - duração total: "corte de 10 minutos", "15min", "faça 5 minutos"
-      - timecode de início: "começando em 00:02:10:00" ou "a partir de 00:02:10,500"
-      - palavras-chave: "sobre X", "da parte do X", "tema X", "palavra X"
-      - melhores momentos: presença de "melhores momentos"
-    Regras:
-      - se per_segment_seconds e total_minutes vierem juntos, prioriza per_segment_seconds (mais específico)
-      - caso apenas total_minutes: cria 1 corte dessa duração (ou divide pelos 'total_segments' se quantidade também vier)
-    """
     s = text.strip().lower()
-    # quantidade de cortes
     count = 1
-    m = re.search(r'(\d+)\s*(?:cortes?|clipes?)\b', s)
-    if m:
-        count = max(1, int(m.group(1)))
-    else:
-        m = re.search(r'\bcrie\s+(\d+)\b', s)
         if m:
             count = max(1, int(m.group(1)))
-    # duração por corte (segundos)
     per_seg_sec = None
-    m = re.search(r'(\d+)\s*(?:segundos?|s)\b', s)
-    if m:
-        per_seg_sec = int(m.group(1))
-    else:
-        # "de 30s", "30 s", etc.
-        m = re.search(r'de\s+(\d+)\s*s\b', s)
         if m:
             per_seg_sec = int(m.group(1))
-    # duração por corte em minutos -> segundos
     if per_seg_sec is None:
-        m = re.search(r'(\d+)\s*(?:minutos?|min)\b', s)
-        if m:
-            per_seg_sec = int(m.group(1)) * 60
-        else:
-            # "de 1min"
-            m = re.search(r'de\s+(\d+)\s*min\b', s)
             if m:
-                per_seg_sec = int(m.group(1)) * 60
-    # duração total (minutos)
     total_min = None
-    # expressões como "corte de 10 minutos", "faça 5 minutos", "crie 15min"
-    m = re.search(r'\b(?:corte|faça|faca|crie|criar|gerar|make|montar)\b.*?(\d+)\s*(?:minutos?|min)\b', s)
-    if m:
-        total_min = float(m.group(1))
-    else:
-        m = re.search(r'\b(\d+)\s*(?:minutos?|min)\b', s)
         if m:
             total_min = float(m.group(1))
-    # timecode de início explícito
-    m = re.search(r'(?:começando|comecando|a partir de|starting at|start at)\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)', s)
-    start_tc = m.group(1) if m else None
-    # palavras-chave depois de "sobre", "da parte do", "tema", "assunto"
     kw = []
-    kw_match = re.search(r'(?:sobre|da parte do|tema|assunto)\s+(.+)', s)
-    if kw_match:
-        # pega o resto da frase e quebra por vírgula
-        tail = kw_match.group(1)
-        kw = [t.strip() for t in re.split(r'[,\.;/]', tail) if t.strip()]
-    # flag de "melhores momentos"
-    best = bool(re.search(r'melhores momentos', s))
     return CommandSpec(
         total_segments=count,
         per_segment_seconds=per_seg_sec,
         total_minutes=total_min,
         start_timecode=start_tc,
         keywords=kw,
-        use_best_moments=best
     )
 # =========================
-# Utilidades de seleção
 # =========================
-def find_keyword_in_segments(segs: List[Segment], keywords: List[str]) -> int:
     if not segs or not keywords:
-        return 0
-    best_idx, best_score = 0, -1
     for idx, seg in enumerate(segs):
         text_lower = seg.text.lower()
-        score = sum(1 for kw in keywords if kw.lower() in text_lower)
         if score > best_score:
             best_idx, best_score = idx, score
-    return best_idx
 def create_continuous_segment_from(start_frame: int, duration_frames: int, segs_preview: List[Segment]) -> Segment:
     end_frame = max(start_frame + duration_frames, start_frame + 1)
-    # preview opcional do texto
     text_parts = []
-    for seg in segs_preview[:10]:
-        if seg.text:
-            text_parts.append(seg.text[:80])
-    combined = " ".join(text_parts)[:300]
     return Segment(
         start_tc=frames_to_timecode(start_frame),
         end_tc=frames_to_timecode(end_frame),
         start_f=start_frame,
         end_f=end_frame,
-        text=("Corte contínuo: " + combined) if combined else "Corte contínuo",
         score=100.0
     )
-def process_with_command(
-    segs: List[Segment],
-    command: str,
-    use_llm: bool
-) -> List[Segment]:
-    """
-    Processa instruções naturais. Funciona com ou sem transcrição:
-      - sem transcrição: cria cortes contínuos a partir do timecode (ou 00:00)
-      - com transcrição: usa keywords/LLM para achar início e criar cortes
-    Regras de duração:
-      - se per_segment_seconds for fornecido -> aplica em cada corte
-      - do contrário, se total_minutes e total_segments > 1 -> divide igualmente
-      - se apenas total_minutes -> 1 corte com essa duração
-      - default se nada especificado -> 1 corte de 60s
-    """
     spec = parse_natural_command(command)
-    # Determinar duração por corte (segundos)
     if spec.per_segment_seconds:
         per_seg_seconds = spec.per_segment_seconds
         total_segments = max(1, spec.total_segments)
-    elif spec.total_minutes and spec.total_segments and spec.total_segments > 1:
-        total_seconds = int(spec.total_minutes * 60)
-        total_segments = spec.total_segments
-        per_seg_seconds = max(1, total_seconds // total_segments)
     elif spec.total_minutes:
-        per_seg_seconds = int(spec.total_minutes * 60)
-        total_segments = 1
     else:
         per_seg_seconds = 60
         total_segments = max(1, spec.total_segments)
-    # Determinar ponto de início (frame)
     start_frame = 0
     if spec.start_timecode:
         try:
             start_frame = parse_timecode_to_frames(spec.start_timecode)
         except Exception:
-            start_frame = 0
-    # Se houver transcrição, tentar achar índice inicial por palavra-chave/LLM
-    start_idx = None
-    if segs:
-        if spec.keywords:
-            start_idx = find_keyword_in_segments(segs, spec.keywords)
-        if use_llm and LLM_AVAILABLE and segs:
-            try:
-                # prepara um preview leve de 80 segmentos (índice|tc|texto)
-                preview = []
-                for i, s in enumerate(segs[:80]):
-                    preview.append(f"{i}|{s.start_tc}|{(s.text or '')[:60]}")
-                preview_text = "\n".join(preview)
-                prompt = f"""Encontre o índice inicial do assunto solicitado, retornando apenas o número (ex: 42).
-BUSCAR: {' '.join(spec.keywords[:5]) or '(sem keywords)'}
-SEGMENTOS (índice|timecode|texto):
-{preview_text}
-"""
-                response = LLM.generate_content(
-                    prompt,
-                    generation_config={"temperature": 0.1, "max_output_tokens": 20}
-                )
-                text = (response.text or "").strip()
-                m = re.search(r'\b(\d+)\b', text)
-                if m:
-                    idx = int(m.group(1))
-                    if 0 <= idx < len(segs):
-                        start_idx = idx
-            except Exception:
-                pass
-    # Construir cortes
     segments_out: List[Segment] = []
     if not segs:
-        # Sem transcrição: cortes contínuos a partir do timecode (ou zero)
-        base_frame = start_frame
         for _ in range(total_segments):
             duration_frames = int(per_seg_seconds * FPS)
             seg = create_continuous_segment_from(base_frame, duration_frames, [])
             segments_out.append(seg)
             base_frame = seg.end_f
         return segments_out
     # Com transcrição
-    # Determina start_frame baseado em start_idx ou em timecode explícito
-    if start_idx is not None and 0 <= start_idx < len(segs):
-        start_frame = segs[start_idx].start_f
-    # Se já havia start_timecode, preserva; se não, usa 0 como fallback
-    base_frame = max(0, start_frame)
-    for _ in range(total_segments):
         duration_frames = int(per_seg_seconds * FPS)
-        # usa preview de texto para descrição
-        seg_preview = segs[start_idx:start_idx + 10] if (start_idx is not None) else segs[:10]
         seg = create_continuous_segment_from(base_frame, duration_frames, seg_preview)
         segments_out.append(seg)
         base_frame = seg.end_f
     return segments_out
 # =========================
-# Modo Automático (score simples)
 # =========================
 def auto_score_segments(
     segs: List[Segment],
@@ -487,32 +637,57 @@ def auto_score_segments(
     weight_learn: float,
     weight_viral: float
 ) -> List[Segment]:
     for s in segs:
         score = 0.0
         text = (s.text or "").lower()
-        if "medo" in text or "coragem" in text:
-            score += weight_emotion
-        if "nunca" in text or "de repente" in text:
-            score += weight_break
-        if "aprendi" in text or "descobri" in text:
-            score += weight_learn
-        if "segredo" in text or "verdade" in text:
-            score += weight_viral
         if custom_keywords:
             for kw in custom_keywords.split(","):
-                if kw.strip().lower() in text:
-                    score += 3.0
         s.score = score
     segs.sort(key=lambda x: x.score, reverse=True)
     return segs[:num_segments]
 # =========================
-# Edição de XML (Premiere)
 # =========================
 def deep_copy_element(elem: ET.Element) -> ET.Element:
     new = ET.Element(elem.tag, attrib=dict(elem.attrib))
@@ -537,13 +712,11 @@ def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
     v_template = v_track.find("./clipitem")
     a_template = a_track.find("./clipitem")
-    # Limpa clips existentes
     for clip in list(v_track.findall("./clipitem")):
         v_track.remove(clip)
     for clip in list(a_track.findall("./clipitem")):
         a_track.remove(clip)
-    # Adiciona novos clips
     timeline_pos = 0
     for i, seg in enumerate(segs, 1):
         duration = seg.end_f - seg.start_f
@@ -623,10 +796,10 @@ def select_segments(
                 pass
         return result
-    # 2) Parser de transcrição (se houver)
     segs = parse_transcript(transcript_txt) if transcript_txt else []
-    # 3) Linguagem natural (sempre permitido; funciona com ou sem transcrição)
     if natural_instructions.strip():
         return process_with_command(segs, natural_instructions, use_llm and LLM_AVAILABLE)
@@ -648,18 +821,19 @@ def process_files(
     weight_emotion, weight_break, weight_learn, weight_viral
 ):
     if not xml_file:
-        return "Envie o XML", None, f"LLM: {LLM_AVAILABLE}"
     try:
-        # Lê transcrição apenas se necessário
         transcript = ""
         manual = parse_manual_timecodes(manual_timecodes)
         if not manual and txt_file:
             with open(txt_file.name, "r", encoding="utf-8-sig") as f:
                 transcript = f.read()
-        # Seleciona segmentos
         segments = select_segments(
             transcript, use_llm and LLM_AVAILABLE, num_segments,
             custom_keywords, manual_timecodes, natural_instructions,
@@ -667,93 +841,171 @@ def process_files(
         )
         if not segments:
-            return "Nenhum segmento selecionado", None, f"LLM: {LLM_AVAILABLE}"
-        # Edita XML
         tree = ET.parse(xml_file.name)
         tree = edit_xml(tree, segments)
-        # Salva
         basename = os.path.splitext(os.path.basename(xml_file.name))[0]
         output = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
         tree.write(output, encoding="utf-8", xml_declaration=True)
-        # Resumo
         total_sec = sum((s.end_f - s.start_f) / FPS for s in segments)
         total_min = total_sec / 60.0
-        mode = "MANUAL" if manual else ("IA/NATURAL" if natural_instructions.strip() else "AUTOMÁTICO")
-        summary_lines = [f"{len(segments)} corte(s) | {total_min:.1f} min total | Modo: {mode}"]
         for i, seg in enumerate(segments, 1):
             dur_sec = (seg.end_f - seg.start_f) / FPS
-            line = f"{i}. {seg.start_tc} → {seg.end_tc} ({dur_sec/60:.1f} min)"
-            if seg.text and len(seg.text) > 50:
-                line += f"\n   {seg.text[:120]}..."
             summary_lines.append(line)
         summary = "\n".join(summary_lines)
-        status = f"Sucesso | {mode} | {total_min:.1f} min | LLM: {LLM_AVAILABLE}"
         return summary, output, status
     except Exception as e:
         import traceback
-        traceback.print_exc()
-        return f"Erro: {str(e)}", None, f"LLM: {LLM_AVAILABLE}"
 # =========================
-# Interface (Gradio)
 # =========================
 with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere") as demo:
-    gr.Markdown("# Editor XML Premiere - IA")
-    gr.Markdown("Cortes com transcrição, minutagens ou comando em linguagem natural.")
     with gr.Row():
-        xml_in = gr.File(label="XML do Premiere", file_types=[".xml"])
-        txt_in = gr.File(label="Transcrição (.txt) - opcional", file_types=[".txt"])
     with gr.Row():
-        use_llm = gr.Checkbox(label="Usar IA (Gemini) quando útil", value=USE_LLM_DEFAULT and LLM_AVAILABLE)
-        num_segments = gr.Slider(2, 20, 5, 1, label="Segmentos (modo automático)")
-    with gr.Accordion("Comando em linguagem natural", open=True):
         gr.Markdown("""
-Exemplos:
-- "Crie 1 corte de 10 minutos começando da parte do tenista"
-- "Quero 3 cortes de 30s sobre Maria e José"
-- "Faça 2 cortes de 45s começando em 00:02:10:00"
-Se não fornecer transcrição, os cortes serão contínuos a partir do timecode indicado (ou 00:00:00:00).
         """)
         natural_instructions = gr.Textbox(
             label="Seu comando",
-            placeholder='Ex: "Crie 2 cortes de 45s sobre coragem e disciplina, começando em 00:01:00:00"',
-            lines=2
         )
-    with gr.Accordion("Minutagens manuais", open=False):
         manual_timecodes = gr.Textbox(
-            label="Timecodes (um por linha)",
-            placeholder="00:21:18:09 - 00:31:18:09",
-            lines=3
         )
-    with gr.Accordion("Modo automático (com transcrição)", open=False):
-        custom_keywords = gr.Textbox(label="Palavras-chave (separadas por vírgula)")
         with gr.Row():
-            weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: emoção")
-            weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: quebra")
         with gr.Row():
-            weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: aprendizado")
-            weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: viral")
-    btn = gr.Button("Processar", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=2):
-            summary_out = gr.Textbox(label="Resumo", lines=12)
         with gr.Column(scale=1):
-            status_out = gr.Textbox(label="Status")
-            file_out = gr.File(label="Download")
     btn.click(
         process_files,
@@ -762,6 +1014,19 @@ Se não fornecer transcrição, os cortes serão contínuos a partir do timecode
          weight_emotion, weight_break, weight_learn, weight_viral],
         [summary_out, file_out, status_out]
     )
 if __name__ == "__main__":
-    demo.launch()

 # Funções de Timecode
 # =========================
 def _tc_to_hmsf(tc: str, fps: int = FPS) -> Tuple[int, int, int, int]:
+    """Converte timecode para (hh, mm, ss, ff)."""
     s = tc.strip()
     # HH:MM:SS:FF ou HH:MM:SS;FF
 # Parser de Transcrição
 # =========================
 def parse_transcript(txt: str) -> List[Segment]:
+    """Parser robusto para múltiplos formatos de transcrição."""
     if not txt or not txt.strip():
         return []
             i += 1
             continue
+        # Formato com traço
         m = line_range.match(raw)
         if m:
             start_tc, end_tc, trailing_text = m.groups()
             if trailing_text.strip():
                 text_parts.append(trailing_text.strip())
             else:
                 j = i + 1
                 while j < len(lines):
                     nxt = lines[j].strip()
                         break
                     if line_range.match(nxt):
                         break
+                    if re.match(r'^\d+\s*$', nxt):
                         break
+                    if arrow.search(nxt):
                         break
                     text_parts.append(nxt)
                     j += 1
             i += 1
             continue
+        # Formato SRT/VTT
         if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])):
             line_with_tc = raw if arrow.search(raw) else lines[i + 1]
             mm = arrow.search(line_with_tc)
             if mm:
                     nxt = lines[j].strip()
                     if not nxt:
                         break
                     if re.match(r'^\d+\s*$', nxt) and (j + 1 < len(lines) and arrow.search(lines[j + 1])):
                         break
                     if arrow.search(nxt):
                 except Exception:
                     pass
                 i = j + 1
                 continue
 # =========================
+# Interpretação do Comando (NLP otimizado)
 # =========================
 @dataclass
 class CommandSpec:
+    total_segments: int
+    per_segment_seconds: Optional[int]
+    total_minutes: Optional[float]
+    start_timecode: Optional[str]
+    end_timecode: Optional[str]
+    keywords: List[str]
+    use_best_moments: bool
+    search_mode: str
 def parse_natural_command(text: str) -> CommandSpec:
+    """Parser NLP robusto com múltiplos padrões."""
     s = text.strip().lower()
+    # Quantidade
     count = 1
+    patterns = [
+        r'(\d+)\s*(?:cortes?|clipes?|segmentos?|trechos?|partes?)',
+        r'(?:crie?|faça?|faca|gere?|monte?|extraia?)\s+(\d+)',
+        r'quero\s+(\d+)',
+        r'preciso\s+(?:de\s+)?(\d+)'
+    ]
+    for pattern in patterns:
+        m = re.search(pattern, s)
         if m:
             count = max(1, int(m.group(1)))
+            break
+    # Duração em segundos
     per_seg_sec = None
+    patterns_sec = [
+        r'(?:cortes?|clipes?|trechos?)\s+de\s+(\d+)\s*(?:segundos?|s\b)',
+        r'(\d+)\s*(?:segundos?|s\b)\s+(?:cada|por)',
+        r'(?:duração|duracao)\s+(?:de\s+)?(\d+)\s*s\b',
+        r'com\s+(\d+)\s*segundos?'
+    ]
+    for pattern in patterns_sec:
+        m = re.search(pattern, s)
         if m:
             per_seg_sec = int(m.group(1))
+            break
+    # Duração em minutos
     if per_seg_sec is None:
+        patterns_min = [
+            r'(?:cortes?|clipes?|trechos?)\s+de\s+(\d+(?:\.\d+)?)\s*(?:minutos?|min\b)',
+            r'(\d+(?:\.\d+)?)\s*(?:minutos?|min\b)\s+(?:cada|por)',
+            r'(?:duração|duracao)\s+(?:de\s+)?(\d+(?:\.\d+)?)\s*min',
+            r'com\s+(\d+(?:\.\d+)?)\s*minutos?'
+        ]
+        for pattern in patterns_min:
+            m = re.search(pattern, s)
             if m:
+                per_seg_sec = int(float(m.group(1)) * 60)
+                break
+    # Duração total
     total_min = None
+    patterns_total = [
+        r'(?:corte|video|vídeo)\s+(?:de|com)\s+(\d+(?:\.\d+)?)\s*(?:minutos?|min\b)',
+        r'(?:totalizando|total\s+de)\s+(\d+(?:\.\d+)?)\s*min',
+        r'(?:faça|faca|crie)\s+(\d+(?:\.\d+)?)\s*minutos?',
+        r'(\d+(?:\.\d+)?)\s*minutos?\s+no\s+total'
+    ]
+    for pattern in patterns_total:
+        m = re.search(pattern, s)
         if m:
             total_min = float(m.group(1))
+            break
+    # Timecode início
+    start_tc = None
+    patterns_start = [
+        r'(?:começando|comecando|iniciando|a partir de|desde|starting at|from)\s+(?:em\s+|às\s+|as\s+)?(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)',
+        r'(?:do|no)\s+(?:tempo|timecode|tc)\s+(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)'
+    ]
+    for pattern in patterns_start:
+        m = re.search(pattern, s)
+        if m:
+            start_tc = m.group(1)
+            break
+    # Timecode fim
+    end_tc = None
+    patterns_end = [
+        r'(?:até|ate|terminando em|até o|finalizando em)\s+(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)',
+        r'(?:ao|no)\s+(?:tempo|timecode|tc)\s+(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)'
+    ]
+    for pattern in patterns_end:
+        m = re.search(pattern, s)
+        if m:
+            end_tc = m.group(1)
+            break
+    # Keywords
     kw = []
+    patterns_kw = [
+        r'(?:sobre|falando sobre|abordando|tratando de|relacionado a)\s+([^,\.]+)',
+        r'(?:da parte|trecho|momento|cena)\s+(?:do|da|dos|das)\s+([^,\.]+)',
+        r'(?:tema|assunto|tópico|topico|conteúdo|conteudo)\s+([^,\.]+)',
+        r'(?:com|contendo|que menciona?|que fala sobre)\s+([^,\.]+)',
+        r'(?:onde|quando|que)\s+(?:fala|menciona|cita|aparece)\s+([^,\.]+)'
+    ]
+    for pattern in patterns_kw:
+        m = re.search(pattern, s)
+        if m:
+            keywords_text = m.group(1)
+            keywords_text = re.sub(r'\s+(?:e|ou|,)\s+', ',', keywords_text)
+            kw = [k.strip() for k in keywords_text.split(',') if k.strip()]
+            stopwords = {'o', 'a', 'os', 'as', 'de', 'do', 'da', 'dos', 'das', 'em', 'no', 'na'}
+            kw = [k for k in kw if k.lower() not in stopwords]
+            break
+    if not kw:
+        for word in ['sobre', 'do', 'da', 'dos', 'das']:
+            if word in s:
+                idx = s.index(word)
+                tail = s[idx + len(word):].strip()
+                end_words = ['começando', 'comecando', 'iniciando', 'de', 'com', 'em']
+                for ew in end_words:
+                    if ew in tail:
+                        tail = tail[:tail.index(ew)]
+                if tail:
+                    kw = [w.strip() for w in tail.split() if len(w.strip()) > 2][:5]
+                    break
+    # Melhores momentos
+    best = bool(re.search(r'melhor(?:es)?\s+momento|mais\s+interessante|destaque|highlight', s))
+    # Modo de busca
+    search_mode = 'continuous'
+    if best:
+        search_mode = 'best_moments'
+    elif kw:
+        search_mode = 'keyword'
     return CommandSpec(
         total_segments=count,
         per_segment_seconds=per_seg_sec,
         total_minutes=total_min,
         start_timecode=start_tc,
+        end_timecode=end_tc,
         keywords=kw,
+        use_best_moments=best,
+        search_mode=search_mode
     )
 # =========================
+# Utilidades (melhoradas)
 # =========================
+def find_keyword_in_segments(segs: List[Segment], keywords: List[str]) -> Tuple[int, float]:
+    """Retorna (índice, score) do melhor match."""
     if not segs or not keywords:
+        return 0, 0.0
+    best_idx, best_score = 0, 0.0
+    kw_lower = [kw.lower() for kw in keywords]
     for idx, seg in enumerate(segs):
         text_lower = seg.text.lower()
+        score = 0.0
+        for kw in kw_lower:
+            if kw in text_lower:
+                score += len(kw.split()) * 5.0
+        words = text_lower.split()
+        for kw in kw_lower:
+            kw_words = kw.split()
+            for kw_word in kw_words:
+                if len(kw_word) > 2:
+                    for word in words:
+                        if kw_word in word or word in kw_word:
+                            score += 1.0
         if score > best_score:
             best_idx, best_score = idx, score
+    return best_idx, best_score
+def find_llm_segment(segs: List[Segment], keywords: List[str], command: str) -> Tuple[Optional[int], float]:
+    """Usa LLM para encontrar segmento. Retorna (índice, confiança)."""
+    if not LLM_AVAILABLE or not segs:
+        return None, 0.0
+    try:
+        preview_lines = []
+        for i, s in enumerate(segs[:100]):
+            text_preview = (s.text or '')[:120]
+            duration_sec = (s.end_f - s.start_f) / FPS
+            preview_lines.append(f"{i}|{s.start_tc}|{duration_sec:.1f}s|{text_preview}")
+        preview_text = "\n".join(preview_lines)
+        keywords_str = ", ".join(keywords[:10]) if keywords else "não especificado"
+        prompt = f"""Analise os segmentos e retorne APENAS o número do índice onde o conteúdo solicitado começa.
+IMPORTANTE: Responda SOMENTE com o número do índice (ex: 42). Não explique.
+COMANDO DO USUÁRIO: {command}
+PALAVRAS-CHAVE: {keywords_str}
+SEGMENTOS (formato: índice|timecode|duração|texto):
+{preview_text}
+Qual índice melhor corresponde ao início do conteúdo solicitado?
+Responda apenas o número:"""
+        response = LLM.generate_content(
+            prompt,
+            generation_config={
+                "temperature": 0.1,
+                "max_output_tokens": 30,
+                "top_p": 0.8
+            }
+        )
+        text = (response.text or "").strip()
+        patterns = [
+            r'^\s*(\d+)\s*$',
+            r'(?:índice|index|segmento)\s*(\d+)',
+            r'(?:número|numero|#)\s*(\d+)',
+            r'\b(\d+)\b'
+        ]
+        for pattern in patterns:
+            m = re.search(pattern, text, re.IGNORECASE)
+            if m:
+                idx = int(m.group(1))
+                if 0 <= idx < len(segs):
+                    confidence = 0.9 if pattern == patterns[0] else 0.7
+                    return idx, confidence
+        return None, 0.0
+    except Exception as e:
+        print(f"Erro no LLM: {e}")
+        return None, 0.0
 def create_continuous_segment_from(start_frame: int, duration_frames: int, segs_preview: List[Segment]) -> Segment:
     end_frame = max(start_frame + duration_frames, start_frame + 1)
     text_parts = []
+    for seg in segs_preview[:15]:
+        if seg.text and len(seg.text.strip()) > 5:
+            text_parts.append(seg.text[:100])
+    combined = " [...] ".join(text_parts)[:400] if text_parts else ""
     return Segment(
         start_tc=frames_to_timecode(start_frame),
         end_tc=frames_to_timecode(end_frame),
         start_f=start_frame,
         end_f=end_frame,
+        text=combined if combined else f"Corte contínuo de {duration_frames/FPS:.1f}s",
         score=100.0
     )
+def process_with_command(segs: List[Segment], command: str, use_llm: bool) -> List[Segment]:
+    """Processa instruções naturais com sistema multi-camadas."""
     spec = parse_natural_command(command)
+    # Calcula duração
     if spec.per_segment_seconds:
         per_seg_seconds = spec.per_segment_seconds
         total_segments = max(1, spec.total_segments)
     elif spec.total_minutes:
+        total_seconds = int(spec.total_minutes * 60)
+        if spec.total_segments > 1:
+            per_seg_seconds = max(5, total_seconds // spec.total_segments)
+            total_segments = spec.total_segments
+        else:
+            per_seg_seconds = total_seconds
+            total_segments = 1
     else:
         per_seg_seconds = 60
         total_segments = max(1, spec.total_segments)
+    # Determina início com fallback
     start_frame = 0
+    start_idx = None
+    search_confidence = 0.0
+    # Timecode explícito
     if spec.start_timecode:
         try:
             start_frame = parse_timecode_to_frames(spec.start_timecode)
+            search_confidence = 1.0
         except Exception:
+            pass
+    # LLM
+    if search_confidence < 0.8 and use_llm and segs and (spec.keywords or spec.search_mode == 'llm'):
+        llm_idx, llm_conf = find_llm_segment(segs, spec.keywords, command)
+        if llm_idx is not None and llm_conf > search_confidence:
+            start_idx = llm_idx
+            start_frame = segs[start_idx].start_f
+            search_confidence = llm_conf
+    # Keywords
+    if search_confidence < 0.6 and segs and spec.keywords:
+        kw_idx, kw_score = find_keyword_in_segments(segs, spec.keywords)
+        kw_conf = min(0.9, kw_score / 10.0)
+        if kw_conf > search_confidence:
+            start_idx = kw_idx
+            start_frame = segs[start_idx].start_f
+            search_confidence = kw_conf
+    # Melhores momentos
+    if spec.use_best_moments and segs:
+        scored = [(i, s) for i, s in enumerate(segs) if s.score > 0]
+        if scored:
+            scored.sort(key=lambda x: x[1].score, reverse=True)
+            start_idx = scored[0][0]
+            start_frame = segs[start_idx].start_f
+            search_confidence = 0.8
+    # Determina fim
+    end_frame = None
+    if spec.end_timecode:
+        try:
+            end_frame = parse_timecode_to_frames(spec.end_timecode)
+        except Exception:
+            pass
+    # Construção dos cortes
     segments_out: List[Segment] = []
+    # Intervalo específico
+    if end_frame and end_frame > start_frame:
+        duration_frames = end_frame - start_frame
+        if total_segments == 1:
+            seg_preview = []
+            if segs and start_idx is not None:
+                seg_preview = segs[start_idx:start_idx + 20]
+            seg = create_continuous_segment_from(start_frame, duration_frames, seg_preview)
+            segments_out.append(seg)
+        else:
+            frames_per_seg = duration_frames // total_segments
+            base = start_frame
+            for i in range(total_segments):
+                seg_preview = []
+                if segs and start_idx is not None:
+                    seg_preview = segs[start_idx + i:start_idx + i + 10]
+                seg = create_continuous_segment_from(base, frames_per_seg, seg_preview)
+                segments_out.append(seg)
+                base = seg.end_f
+        return segments_out
+    # Cortes sequenciais
+    base_frame = start_frame
     if not segs:
         for _ in range(total_segments):
             duration_frames = int(per_seg_seconds * FPS)
             seg = create_continuous_segment_from(base_frame, duration_frames, [])
             segments_out.append(seg)
             base_frame = seg.end_f
         return segments_out
     # Com transcrição
+    for i in range(total_segments):
         duration_frames = int(per_seg_seconds * FPS)
+        seg_start_idx = None
+        if start_idx is not None:
+            for idx in range(start_idx, len(segs)):
+                if segs[idx].start_f >= base_frame:
+                    seg_start_idx = idx
+                    break
+        else:
+            for idx, s in enumerate(segs):
+                if s.start_f >= base_frame:
+                    seg_start_idx = idx
+                    break
+        seg_preview = []
+        if seg_start_idx is not None:
+            end_of_cut = base_frame + duration_frames
+            for s in segs[seg_start_idx:]:
+                if s.start_f < end_of_cut:
+                    seg_preview.append(s)
+                else:
+                    break
         seg = create_continuous_segment_from(base_frame, duration_frames, seg_preview)
         segments_out.append(seg)
         base_frame = seg.end_f
     return segments_out
 # =========================
+# Modo Automático
 # =========================
 def auto_score_segments(
     segs: List[Segment],
     weight_learn: float,
     weight_viral: float
 ) -> List[Segment]:
+    """Sistema de pontuação expandido."""
+    emotion_words = ['medo', 'coragem', 'amor', 'ódio', 'paixão', 'alegria', 'tristeza',
+                     'ansiedade', 'felicidade', 'emoção', 'sentimento', 'coração']
+    break_words = ['nunca', 'de repente', 'surpreendente', 'inesperado', 'incrível',
+                   'chocante', 'virada', 'mudança', 'momento', 'aconteceu']
+    learn_words = ['aprendi', 'descobri', 'entendi', 'percebi', 'compreendi', 'lição',
+                   'ensinamento', 'experiência', 'conhecimento', 'insight']
+    viral_words = ['segredo', 'verdade', 'ninguém sabe', 'revelação', 'exclusivo',
+                   'primeira vez', 'confissão', 'polêmica', 'controverso']
     for s in segs:
         score = 0.0
         text = (s.text or "").lower()
+        for word in emotion_words:
+            if word in text:
+                score += weight_emotion
+        for word in break_words:
+            if word in text:
+                score += weight_break
+        for word in learn_words:
+            if word in text:
+                score += weight_learn
+        for word in viral_words:
+            if word in text:
+                score += weight_viral
         if custom_keywords:
             for kw in custom_keywords.split(","):
+                kw_clean = kw.strip().lower()
+                if kw_clean and kw_clean in text:
+                    score += 3.0 * len(kw_clean.split())
+        duration_sec = (s.end_f - s.start_f) / FPS
+        if 10 <= duration_sec <= 120:
+            score += 0.5
+        if len(text) > 100:
+            score += 0.3
         s.score = score
     segs.sort(key=lambda x: x.score, reverse=True)
     return segs[:num_segments]
 # =========================
+# Edição de XML
 # =========================
 def deep_copy_element(elem: ET.Element) -> ET.Element:
     new = ET.Element(elem.tag, attrib=dict(elem.attrib))
     v_template = v_track.find("./clipitem")
     a_template = a_track.find("./clipitem")
     for clip in list(v_track.findall("./clipitem")):
         v_track.remove(clip)
     for clip in list(a_track.findall("./clipitem")):
         a_track.remove(clip)
     timeline_pos = 0
     for i, seg in enumerate(segs, 1):
         duration = seg.end_f - seg.start_f
                 pass
         return result
+    # 2) Parser de transcrição
     segs = parse_transcript(transcript_txt) if transcript_txt else []
+    # 3) Linguagem natural
     if natural_instructions.strip():
         return process_with_command(segs, natural_instructions, use_llm and LLM_AVAILABLE)
     weight_emotion, weight_break, weight_learn, weight_viral
 ):
     if not xml_file:
+        return "⚠️ Envie o XML do Premiere", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
     try:
+        debug_info = []
         transcript = ""
         manual = parse_manual_timecodes(manual_timecodes)
         if not manual and txt_file:
             with open(txt_file.name, "r", encoding="utf-8-sig") as f:
                 transcript = f.read()
+            debug_info.append(f"📄 Transcrição carregada: {len(transcript)} caracteres")
         segments = select_segments(
             transcript, use_llm and LLM_AVAILABLE, num_segments,
             custom_keywords, manual_timecodes, natural_instructions,
         )
         if not segments:
+            return "⚠️ Nenhum segmento selecionado. Verifique os parâmetros.", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
+        valid_segments = []
+        for seg in segments:
+            if seg.end_f > seg.start_f and seg.end_f - seg.start_f >= FPS:
+                valid_segments.append(seg)
+        if not valid_segments:
+            return "⚠️ Segmentos inválidos (duração muito curta). Ajuste os parâmetros.", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
+        segments = valid_segments
+        debug_info.append(f"✓ {len(segments)} segmento(s) válido(s)")
         tree = ET.parse(xml_file.name)
         tree = edit_xml(tree, segments)
         basename = os.path.splitext(os.path.basename(xml_file.name))[0]
         output = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
         tree.write(output, encoding="utf-8", xml_declaration=True)
         total_sec = sum((s.end_f - s.start_f) / FPS for s in segments)
         total_min = total_sec / 60.0
+        if manual:
+            mode = "🎯 MANUAL"
+        elif natural_instructions.strip():
+            spec = parse_natural_command(natural_instructions)
+            if spec.keywords:
+                mode = f"🤖 IA + BUSCA ({', '.join(spec.keywords[:3])})"
+            else:
+                mode = "📐 IA + CONTÍNUO"
+        else:
+            mode = "⚙️ AUTOMÁTICO"
+        summary_lines = [
+            "═" * 60,
+            f"✨ RESULTADO: {len(segments)} corte(s) | {total_min:.1f} min total",
+            f"📊 Modo: {mode}",
+            "═" * 60,
+            ""
+        ]
         for i, seg in enumerate(segments, 1):
             dur_sec = (seg.end_f - seg.start_f) / FPS
+            dur_min = dur_sec / 60.0
+            line = f"🎬 Corte {i}:"
+            line += f"\n   ⏱️  {seg.start_tc} → {seg.end_tc} ({dur_min:.2f} min)"
+            if seg.text and len(seg.text.strip()) > 10:
+                text_preview = seg.text[:150].strip()
+                if len(seg.text) > 150:
+                    text_preview += "..."
+                line += f"\n   💬 {text_preview}"
+            if seg.score > 0:
+                line += f"\n   ⭐ Score: {seg.score:.1f}"
             summary_lines.append(line)
+            summary_lines.append("")
+        if debug_info:
+            summary_lines.append("═" * 60)
+            summary_lines.append("🔍 Debug:")
+            summary_lines.extend(f"   {info}" for info in debug_info)
         summary = "\n".join(summary_lines)
+        status = f"✅ Sucesso | {mode} | {total_min:.1f} min | LLM: {'✓' if LLM_AVAILABLE else '✗'}"
         return summary, output, status
     except Exception as e:
         import traceback
+        error_trace = traceback.format_exc()
+        print(error_trace)
+        error_msg = f"❌ Erro: {str(e)}\n\n🔍 Detalhes técnicos:\n{error_trace[:500]}"
+        return error_msg, None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
 # =========================
+# Interface Gradio
 # =========================
 with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere") as demo:
+    gr.Markdown("# 🎬 Editor XML Premiere - IA Avançada")
+    gr.Markdown("Sistema inteligente de cortes com IA (Gemini), busca por keywords e timecodes manuais.")
+    status_inicial = f"{'🟢 IA Disponível (Gemini 2.0)' if LLM_AVAILABLE else '🟡 Modo básico (IA desabilitada - configure GEMINI_API_KEY)'}"
+    gr.Markdown(f"**Status:** {status_inicial}")
     with gr.Row():
+        xml_in = gr.File(label="📄 XML do Premiere", file_types=[".xml"])
+        txt_in = gr.File(label="📝 Transcrição (.txt) - opcional", file_types=[".txt"])
     with gr.Row():
+        use_llm = gr.Checkbox(
+            label="🤖 Usar IA (Gemini) para busca inteligente",
+            value=USE_LLM_DEFAULT and LLM_AVAILABLE,
+            interactive=LLM_AVAILABLE
+        )
+        num_segments = gr.Slider(2, 20, 5, 1, label="📊 Segmentos (modo automático)")
+    with gr.Accordion("💬 Comando em linguagem natural (RECOMENDADO)", open=True):
         gr.Markdown("""
+**Exemplos de comandos suportados:**
+📌 **Duração e quantidade:**
+- "Crie 3 cortes de 30 segundos"
+- "Faça 1 corte de 10 minutos"
+- "Quero 5 clipes de 45s cada"
+📍 **Com timecode:**
+- "2 cortes de 1min começando em 00:02:10:00"
+- "Corte de 5 minutos a partir de 00:05:00:00"
+🔍 **Com busca de conteúdo (requer transcrição + IA):**
+- "3 cortes de 30s sobre Maria e José"
+- "1 corte de 10 minutos da parte do tenista"
+- "2 clipes de 45s falando sobre coragem"
+- "Corte sobre disciplina começando em 00:02:00"
+🎯 **Intervalo específico:**
+- "Corte de 00:10:00:00 até 00:15:00:00"
+- "3 segmentos começando em 00:02:00 até 00:05:00"
+💡 **Dicas:**
+- Com transcrição + IA: busca automática do conteúdo
+- Sem transcrição: cortes contínuos a partir do timecode
+- Seja específico nas durações e palavras-chave
         """)
         natural_instructions = gr.Textbox(
             label="Seu comando",
+            placeholder='Ex: "Crie 2 cortes de 45s sobre disciplina, começando em 00:01:00:00"',
+            lines=3
         )
+    with gr.Accordion("🎯 Minutagens manuais (alta precisão)", open=False):
+        gr.Markdown("Use este modo quando souber exatamente os timecodes. Um por linha ou separados por vírgula.")
         manual_timecodes = gr.Textbox(
+            label="Timecodes (formato: HH:MM:SS:FF - HH:MM:SS:FF)",
+            placeholder="00:21:18:09 - 00:31:18:09\n00:45:20:15 - 00:50:10:22",
+            lines=4
         )
+    with gr.Accordion("⚙️ Modo automático (com transcrição)", open=False):
+        gr.Markdown("Sistema de pontuação automática baseado em palavras-chave e pesos.")
+        custom_keywords = gr.Textbox(
+            label="Palavras-chave personalizadas (separadas por vírgula)",
+            placeholder="coragem, superação, vitória"
+        )
         with gr.Row():
+            weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="⚡ Peso: emoção")
+            weight_break = gr.Slider(0, 5, 1.5, 0.1, label="💥 Peso: quebra")
         with gr.Row():
+            weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="🎓 Peso: aprendizado")
+            weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="🔥 Peso: viral")
+    btn = gr.Button("🚀 Processar e Gerar XML", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=2):
+            summary_out = gr.Textbox(label="📋 Resumo dos Cortes", lines=15, max_lines=25)
         with gr.Column(scale=1):
+            status_out = gr.Textbox(label="📊 Status", lines=3)
+            file_out = gr.File(label="⬇️ Download XML Editado")
     btn.click(
         process_files,
          weight_emotion, weight_break, weight_learn, weight_viral],
         [summary_out, file_out, status_out]
     )
+    gr.Markdown("""
+---
+### 📚 Como usar:
+1. **Envie o XML** exportado do Premiere (File > Export > Final Cut Pro XML)
+2. **Opcional:** Envie transcrição para buscas inteligentes
+3. **Escolha um modo:**
+   - 💬 Linguagem natural (mais fácil)
+   - 🎯 Minutagens manuais (mais preciso)
+   - ⚙️ Automático (experimental)
+4. Clique em **Processar** e faça download do XML editado
+5. Importe de volta no Premiere (File > Import)
+    """)
 if __name__ == "__main__":
+    demo.launch()