Spaces:

leicam
/

EditorAutomaticoXML

Running

App Files Files Community

leicam commited on Oct 7, 2025

Commit

8769bc9

verified ·

1 Parent(s): ae976b8

Update app.py

Browse files

Files changed (1) hide show

app.py +547 -304

app.py CHANGED Viewed

@@ -5,7 +5,16 @@ from dataclasses import dataclass
 from typing import List, Tuple, Optional
 import gradio as gr
-# LLM Configuration
 USE_LLM_DEFAULT = True
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
@@ -23,11 +32,10 @@ except Exception:
     LLM = None
     LLM_AVAILABLE = False
-# Config
-FPS = 24
-OUTPUT_DIR = "./Output"
-os.makedirs(OUTPUT_DIR, exist_ok=True)
 @dataclass
 class Segment:
     start_tc: str
@@ -37,272 +45,452 @@ class Segment:
     text: str
     score: float
-# ============ TIMECODE FUNCTIONS ============
 def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
-    tc = tc.strip()
-    m = re.match(r'^(\d{2}):(\d{2}):(\d{2})[:;](\d{2})$', tc)
-    if not m:
-        raise ValueError(f"Timecode inválido: {tc}")
-    hh, mm, ss, ff = map(int, m.groups())
-    return hh*3600*fps + mm*60*fps + ss*fps + ff
 def frames_to_timecode(frames: int, fps: int = FPS) -> str:
-    hh = frames // (3600*fps)
-    rem = frames % (3600*fps)
-    mm = rem // (60*fps)
-    rem = rem % (60*fps)
     ss = rem // fps
     ff = rem % fps
     return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
-# ============ TRANSCRIPT PARSING ============
 def parse_transcript(txt: str) -> List[Segment]:
-    """Parse transcrição - aceita vários formatos"""
     if not txt or not txt.strip():
-        print("⚠️ Transcrição vazia")
         return []
-    lines = txt.strip().splitlines()
-    results = []
-    # Regex flexível
-    pattern = re.compile(
-        r'^\s*\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—–]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s*(.*)$',
-        re.IGNORECASE
     )
-    for idx, line in enumerate(lines):
-        line = line.strip()
-        if not line or line.lower() == "desconhecido":
             continue
-        match = pattern.match(line)
-        if match:
-            start_tc, end_tc, text = match.groups()
-            text = text.strip()
-            if not text or text.lower() == "desconhecido":
-                continue
             try:
-                start_f = parse_timecode_to_frames(start_tc)
-                end_f = parse_timecode_to_frames(end_tc)
-                if end_f > start_f:
                     results.append(Segment(
-                        start_tc=start_tc,
-                        end_tc=end_tc,
-                        start_f=start_f,
-                        end_f=end_f,
-                        text=text,
                         score=0.0
                     ))
-            except Exception as e:
-                print(f"⚠️ Erro linha {idx}: {str(e)}")
                 continue
-    print(f"✅ {len(results)} segmentos encontrados")
     return results
-# ============ MANUAL TIMECODES ============
 def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
     if not manual_input or not manual_input.strip():
         return []
     manual_ranges = []
     lines = manual_input.replace(",", "\n").splitlines()
-    pattern = re.compile(r'(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-–—]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})')
     for line in lines:
         m = pattern.search(line.strip())
         if m:
             manual_ranges.append((m.group(1), m.group(2)))
     return manual_ranges
-# ============ AI HELPERS ============
-def extract_duration_minutes(text: str) -> Optional[float]:
-    """Extrai duração em minutos"""
-    text_lower = text.lower()
-    patterns = [
-        r'(\d+)\s*minutos?',
-        r'(\d+)\s*min\b',
-        r'(\d+)m\b',
-        r'corte\s+de\s+(\d+)'
-    ]
-    for pattern in patterns:
-        match = re.search(pattern, text_lower)
-        if match:
-            return float(match.group(1))
-    return None
 def find_keyword_in_segments(segs: List[Segment], keywords: List[str]) -> int:
-    """Busca simples por palavras-chave"""
-    if not keywords:
         return 0
-    best_idx = 0
-    best_score = 0
     for idx, seg in enumerate(segs):
         text_lower = seg.text.lower()
         score = sum(1 for kw in keywords if kw.lower() in text_lower)
         if score > best_score:
-            best_score = score
-            best_idx = idx
     return best_idx
-def create_continuous_segment(segs: List[Segment], start_idx: int, duration_min: float) -> Segment:
-    """Cria um segmento contínuo"""
-    if start_idx >= len(segs):
-        start_idx = 0
-    start_seg = segs[start_idx]
-    start_frame = start_seg.start_f
-    duration_frames = int(duration_min * 60 * FPS)
-    end_frame = start_frame + duration_frames
-    # Pega texto dos primeiros segmentos
     text_parts = []
-    for seg in segs[start_idx:min(start_idx+10, len(segs))]:
-        text_parts.append(seg.text[:80])
-    combined_text = " ".join(text_parts)[:300]
     return Segment(
         start_tc=frames_to_timecode(start_frame),
         end_tc=frames_to_timecode(end_frame),
         start_f=start_frame,
         end_f=end_frame,
-        text=f"Corte contínuo ({duration_min}min): {combined_text}",
         score=100.0
     )
-def process_with_ai(segs: List[Segment], instructions: str) -> List[Segment]:
-    """Processa com IA"""
-    # Extrai duração
-    duration = extract_duration_minutes(instructions)
-    # Identifica palavras-chave importantes
-    keywords = []
-    text_lower = instructions.lower()
-    keyword_map = {
-        'tenista': ['tenista', 'tênis', 'tenis', 'jogador', 'kinguios'],
-        'maria': ['maria', 'josé', 'jose', 'casal', 'seguro'],
-        'protocolo': ['protocolo', 'rodar', 'dependência', 'dependencia'],
-    }
-    for key, terms in keyword_map.items():
-        if any(term in text_lower for term in terms):
-            keywords.extend(terms)
-    print(f"📊 Duração: {duration}min | Keywords: {keywords[:3]}")
-    # Encontra ponto de início
-    start_idx = 0
-    if LLM_AVAILABLE and keywords:
-        try:
-            # Cria preview dos segmentos
-            preview = []
-            for i, s in enumerate(segs[:100]):
-                preview.append(f"{i}|{s.start_tc}|{s.text[:60]}")
-            preview_text = "\n".join(preview[:80])
-            prompt = f"""Encontre o índice onde começa o assunto solicitado.
-BUSCAR: {' '.join(keywords[:3])}
-SEGMENTOS (formato: índice|timecode|texto):
-{preview_text}
-Retorne APENAS o número do índice (exemplo: 42)"""
-            response = LLM.generate_content(
-                prompt,
-                generation_config={"temperature": 0.1, "max_output_tokens": 20}
-            )
-            text = (response.text or "").strip()
-            match = re.search(r'\b(\d+)\b', text)
-            if match:
-                idx = int(match.group(1))
-                if 0 <= idx < len(segs):
-                    start_idx = idx
-                    print(f"✅ IA encontrou: segmento {start_idx} ({segs[start_idx].start_tc})")
-        except Exception as e:
-            print(f"⚠️ IA falhou: {e}")
-    # Fallback: busca por keywords
-    if start_idx == 0 and keywords:
-        start_idx = find_keyword_in_segments(segs, keywords)
-        print(f"✅ Busca por keyword: segmento {start_idx} ({segs[start_idx].start_tc})")
-    # Cria corte
-    if duration:
-        result = create_continuous_segment(segs, start_idx, duration)
-        print(f"✅ Corte: {result.start_tc} → {result.end_tc} ({duration}min)")
-        return [result]
     else:
-        # Sem duração: retorna múltiplos segmentos
-        return segs[start_idx:start_idx+10]
-# ============ MAIN SELECTION ============
-def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
-                   custom_keywords: str, manual_timecodes: str, natural_instructions: str,
-                   weight_emotion: float, weight_break: float,
-                   weight_learn: float, weight_viral: float) -> List[Segment]:
-    # Prioridade 1: Manual
-    manual = parse_manual_timecodes(manual_timecodes)
-    if manual:
-        print(f"🔧 Modo MANUAL: {len(manual)} cortes")
-        result = []
-        for start_tc, end_tc in manual:
             try:
-                result.append(Segment(
-                    start_tc=start_tc,
-                    end_tc=end_tc,
-                    start_f=parse_timecode_to_frames(start_tc),
-                    end_f=parse_timecode_to_frames(end_tc),
-                    text=f"Manual: {start_tc}-{end_tc}",
-                    score=100.0
-                ))
-            except:
                 pass
-        return result
-    # Parse transcrição
-    segs = parse_transcript(transcript_txt)
     if not segs:
-        raise ValueError("❌ Nenhum segmento encontrado. Formato esperado: 00:00:00:00 - 00:00:10:00 Texto")
-    # Prioridade 2: IA com linguagem natural
-    if natural_instructions.strip() and use_llm:
-        print("🤖 Modo IA")
-        return process_with_ai(segs, natural_instructions)
-    # Prioridade 3: Automático por score
-    print("⚙️ Modo AUTOMÁTICO")
     for s in segs:
-        score = 0
-        text = s.text.lower()
         if "medo" in text or "coragem" in text:
             score += weight_emotion
         if "nunca" in text or "de repente" in text:
@@ -311,18 +499,21 @@ def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
             score += weight_learn
         if "segredo" in text or "verdade" in text:
             score += weight_viral
         if custom_keywords:
             for kw in custom_keywords.split(","):
                 if kw.strip().lower() in text:
                     score += 3.0
         s.score = score
     segs.sort(key=lambda x: x.score, reverse=True)
     return segs[:num_segments]
-# ============ XML EDITING ============
 def deep_copy_element(elem: ET.Element) -> ET.Element:
     new = ET.Element(elem.tag, attrib=dict(elem.attrib))
     new.text = elem.text
@@ -331,42 +522,42 @@ def deep_copy_element(elem: ET.Element) -> ET.Element:
         new.append(deep_copy_element(child))
     return new
 def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
     root = tree.getroot()
     seq = root.find(".//sequence")
     if seq is None:
         raise ValueError("Sequence não encontrada no XML")
     v_track = seq.find(".//media/video/track")
     a_track = seq.find(".//media/audio/track")
     if not v_track or not a_track:
         raise ValueError("Trilhas de vídeo/áudio não encontradas")
     v_template = v_track.find("./clipitem")
     a_template = a_track.find("./clipitem")
     # Limpa clips existentes
     for clip in list(v_track.findall("./clipitem")):
         v_track.remove(clip)
     for clip in list(a_track.findall("./clipitem")):
         a_track.remove(clip)
     # Adiciona novos clips
     timeline_pos = 0
     for i, seg in enumerate(segs, 1):
         duration = seg.end_f - seg.start_f
-        # Vídeo clip
         v_clip = ET.Element("clipitem", {"id": f"clip-v{i}"})
         ET.SubElement(v_clip, "name").text = f"Clip {i}"
         ET.SubElement(v_clip, "start").text = str(timeline_pos)
         ET.SubElement(v_clip, "end").text = str(timeline_pos + duration)
         ET.SubElement(v_clip, "in").text = str(seg.start_f)
         ET.SubElement(v_clip, "out").text = str(seg.end_f)
         if v_template is not None:
             rate = v_template.find("rate")
             if rate is not None:
@@ -374,15 +565,15 @@ def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
             file_elem = v_template.find("file")
             if file_elem is not None:
                 v_clip.append(deep_copy_element(file_elem))
-        # Áudio clip
         a_clip = ET.Element("clipitem", {"id": f"clip-a{i}"})
         ET.SubElement(a_clip, "name").text = f"Clip {i}"
         ET.SubElement(a_clip, "start").text = str(timeline_pos)
         ET.SubElement(a_clip, "end").text = str(timeline_pos + duration)
         ET.SubElement(a_clip, "in").text = str(seg.start_f)
         ET.SubElement(a_clip, "out").text = str(seg.end_f)
         if a_template is not None:
             rate = a_template.find("rate")
             if rate is not None:
@@ -390,128 +581,180 @@ def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
             file_elem = a_template.find("file")
             if file_elem is not None:
                 a_clip.append(deep_copy_element(file_elem))
         v_track.append(v_clip)
         a_track.append(a_clip)
         timeline_pos += duration
     return tree
-# ============ GRADIO ============
-def process_files(xml_file, txt_file, use_llm, num_segments,
-                 custom_keywords, manual_timecodes, natural_instructions,
-                 weight_emotion, weight_break, weight_learn, weight_viral):
     if not xml_file:
-        return "❌ Envie o XML", None, f"LLM: {LLM_AVAILABLE}"
     try:
-        # Lê transcrição se necessário
         transcript = ""
         manual = parse_manual_timecodes(manual_timecodes)
-        if not manual:
-            if not txt_file:
-                return "❌ Envie a transcrição (.txt)", None, f"LLM: {LLM_AVAILABLE}"
-            with open(txt_file.name, "r", encoding="utf-8") as f:
                 transcript = f.read()
         # Seleciona segmentos
         segments = select_segments(
             transcript, use_llm and LLM_AVAILABLE, num_segments,
             custom_keywords, manual_timecodes, natural_instructions,
             weight_emotion, weight_break, weight_learn, weight_viral
         )
         if not segments:
-            return "❌ Nenhum segmento selecionado", None, f"LLM: {LLM_AVAILABLE}"
         # Edita XML
         tree = ET.parse(xml_file.name)
         tree = edit_xml(tree, segments)
         # Salva
         basename = os.path.splitext(os.path.basename(xml_file.name))[0]
         output = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
         tree.write(output, encoding="utf-8", xml_declaration=True)
         # Resumo
         total_sec = sum((s.end_f - s.start_f) / FPS for s in segments)
-        total_min = total_sec / 60
-        mode = "MANUAL" if manual else ("IA" if natural_instructions.strip() else "AUTOMÁTICO")
-        summary = f"✅ {len(segments)} corte(s) | {total_min:.1f} min total | Modo: {mode}\n\n"
         for i, seg in enumerate(segments, 1):
             dur_sec = (seg.end_f - seg.start_f) / FPS
-            summary += f"{i}. {seg.start_tc} → {seg.end_tc} ({dur_sec/60:.1f}min)\n"
             if seg.text and len(seg.text) > 50:
-                summary += f"   {seg.text[:120]}...\n"
-            summary += "\n"
-        status = f"✅ Sucesso | {mode} | {total_min:.1f}min | LLM: {LLM_AVAILABLE}"
         return summary, output, status
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return f"❌ Erro: {str(e)}", None, f"LLM: {LLM_AVAILABLE}"
-# ============ UI ============
 with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere") as demo:
-    gr.Markdown("# 🎬 Editor XML Premiere - IA")
-    gr.Markdown("Cortes inteligentes com linguagem natural")
     with gr.Row():
-        xml_in = gr.File(label="📁 XML do Premiere", file_types=[".xml"])
-        txt_in = gr.File(label="📄 Transcrição (.txt)", file_types=[".txt"])
     with gr.Row():
-        use_llm = gr.Checkbox(label="🤖 Usar IA", value=USE_LLM_DEFAULT and LLM_AVAILABLE)
-        num_segments = gr.Slider(2, 20, 5, 1, label="Segmentos (automático)")
-    with gr.Accordion("💬 IA - Linguagem Natural", open=True):
         gr.Markdown("""
-**Exemplos:**
-- `Extraia um corte de 10 minutos começando da parte do tenista`
-- `Crie 15 minutos com os melhores momentos`
-- `5 minutos sobre Maria e José`
         """)
         natural_instructions = gr.Textbox(
-            label="Suas instruções",
-            placeholder='Ex: "10 minutos começando da parte do tenista"',
             lines=2
         )
-    with gr.Accordion("⏱️ Minutagens Manuais", open=False):
         manual_timecodes = gr.Textbox(
             label="Timecodes (um por linha)",
             placeholder="00:21:18:09 - 00:31:18:09",
             lines=3
         )
-    with gr.Accordion("⚙️ Modo Automático", open=False):
-        custom_keywords = gr.Textbox(label="Palavras-chave")
         with gr.Row():
-            weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Emoção")
-            weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Quebra")
         with gr.Row():
-            weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Aprendizado")
-            weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Viral")
-    btn = gr.Button("🚀 Processar", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=2):
-            summary_out = gr.Textbox(label="📊 Resumo", lines=12)
         with gr.Column(scale=1):
             status_out = gr.Textbox(label="Status")
-            file_out = gr.File(label="⬇️ Download")
     btn.click(
         process_files,
         [xml_in, txt_in, use_llm, num_segments, custom_keywords,
@@ -521,4 +764,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere") as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 from typing import List, Tuple, Optional
 import gradio as gr
+# =========================
+# Configurações Gerais
+# =========================
+FPS = 24
+OUTPUT_DIR = "./Output"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+# =========================
+# LLM (opcional - Gemini)
+# =========================
 USE_LLM_DEFAULT = True
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
     LLM = None
     LLM_AVAILABLE = False
+# =========================
+# Modelos
+# =========================
 @dataclass
 class Segment:
     start_tc: str
     text: str
     score: float
+# =========================
+# Funções de Timecode
+# =========================
+def _tc_to_hmsf(tc: str, fps: int = FPS) -> Tuple[int, int, int, int]:
+    """
+    Converte timecode para (hh, mm, ss, ff). Aceita:
+    - HH:MM:SS:FF ou HH:MM:SS;FF
+    - HH:MM:SS[.,]mmm (milissegundos)
+    - H:MM:SS (sem frames)
+    """
+    s = tc.strip()
+    # HH:MM:SS:FF ou HH:MM:SS;FF
+    m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[:;](\d{2})$', s)
+    if m:
+        hh, mm, ss, ff = map(int, m.groups())
+        return hh, mm, ss, ff
+    # HH:MM:SS[.,]mmm
+    m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[.,](\d{1,3})$', s)
+    if m:
+        hh, mm, ss, ms = map(int, m.groups())
+        ff = int(round((ms / 1000.0) * fps))
+        if ff >= fps:
+            ss += 1
+            ff = 0
+        return hh, mm, ss, ff
+    # H:MM:SS
+    m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})$', s)
+    if m:
+        hh, mm, ss = map(int, m.groups())
+        return hh, mm, ss, 0
+    raise ValueError(f"Timecode inválido: {tc}")
 def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
+    hh, mm, ss, ff = _tc_to_hmsf(tc, fps)
+    return hh * 3600 * fps + mm * 60 * fps + ss * fps + ff
 def frames_to_timecode(frames: int, fps: int = FPS) -> str:
+    hh = frames // (3600 * fps)
+    rem = frames % (3600 * fps)
+    mm = rem // (60 * fps)
+    rem = rem % (60 * fps)
     ss = rem // fps
     ff = rem % fps
     return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
+# =========================
+# Parser de Transcrição
+# =========================
 def parse_transcript(txt: str) -> List[Segment]:
+    """
+    Aceita múltiplos formatos:
+    A) Uma linha:  00:00:00:00 - 00:00:10:00 Texto...
+    B) Duas linhas: 00:00:00:00 - 00:00:10:00 \n Texto...
+    C) SRT/VTT com setas:
+       1
+       00:00:05,120 --> 00:00:08,300
+       Texto linha 1
+       Texto linha 2
+       [linha em branco]
+    """
     if not txt or not txt.strip():
         return []
+    lines = [l.rstrip() for l in txt.splitlines()]
+    results: List[Segment] = []
+    line_range = re.compile(
+        r'^\s*\[?\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-—–]\s*'
+        r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*\]?\s*(.*)$'
     )
+    arrow = re.compile(
+        r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)\s*-->\s*'
+        r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)'
+    )
+    i = 0
+    while i < len(lines):
+        raw = lines[i].strip()
+        if not raw or raw.lower() == "desconhecido":
+            i += 1
             continue
+        # Casos A e B (com traço)
+        m = line_range.match(raw)
+        if m:
+            start_tc, end_tc, trailing_text = m.groups()
+            text_parts = []
+            if trailing_text.strip():
+                text_parts.append(trailing_text.strip())
+            else:
+                # Texto nas linhas seguintes até linha em branco ou novo bloco
+                j = i + 1
+                while j < len(lines):
+                    nxt = lines[j].strip()
+                    if not nxt:
+                        break
+                    if line_range.match(nxt):
+                        break
+                    if re.match(r'^\d+\s*$', nxt):  # índice SRT
+                        break
+                    if arrow.search(nxt):          # linha SRT com -->
+                        break
+                    text_parts.append(nxt)
+                    j += 1
+                i = j - 1
+            text = " ".join(text_parts).strip()
             try:
+                sf = parse_timecode_to_frames(start_tc)
+                ef = parse_timecode_to_frames(end_tc)
+                if ef > sf:
                     results.append(Segment(
+                        start_tc=frames_to_timecode(sf),
+                        end_tc=frames_to_timecode(ef),
+                        start_f=sf,
+                        end_f=ef,
+                        text=text if text else f"{start_tc} - {end_tc}",
                         score=0.0
                     ))
+            except Exception:
+                pass
+            i += 1
+            continue
+        # Caso C (SRT/VTT com -->)
+        if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])):
+            # Se a linha atual não tem arrow, tente a próxima (muitos SRTs têm um índice numérico antes)
+            line_with_tc = raw if arrow.search(raw) else lines[i + 1]
+            mm = arrow.search(line_with_tc)
+            if mm:
+                start_tc, end_tc = mm.groups()
+                j = i + 1 if line_with_tc == raw else i + 2
+                text_parts = []
+                while j < len(lines):
+                    nxt = lines[j].strip()
+                    if not nxt:
+                        break
+                    # próximo bloco: índice seguido de timecode
+                    if re.match(r'^\d+\s*$', nxt) and (j + 1 < len(lines) and arrow.search(lines[j + 1])):
+                        break
+                    if arrow.search(nxt):
+                        break
+                    text_parts.append(nxt)
+                    j += 1
+                text = " ".join(text_parts).strip()
+                try:
+                    sf = parse_timecode_to_frames(start_tc)
+                    ef = parse_timecode_to_frames(end_tc)
+                    if ef > sf:
+                        results.append(Segment(
+                            start_tc=frames_to_timecode(sf),
+                            end_tc=frames_to_timecode(ef),
+                            start_f=sf,
+                            end_f=ef,
+                            text=text,
+                            score=0.0
+                        ))
+                except Exception:
+                    pass
+                # Avança o ponteiro para depois do bloco
+                i = j + 1
                 continue
+        i += 1
     return results
+# =========================
+# Minutagens Manuais
+# =========================
 def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
     if not manual_input or not manual_input.strip():
         return []
     manual_ranges = []
     lines = manual_input.replace(",", "\n").splitlines()
+    pattern = re.compile(r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-–—]\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)')
     for line in lines:
         m = pattern.search(line.strip())
         if m:
             manual_ranges.append((m.group(1), m.group(2)))
     return manual_ranges
+# =========================
+# Interpretação do Comando (NLP simples)
+# =========================
+@dataclass
+class CommandSpec:
+    total_segments: int            # quantidade de cortes
+    per_segment_seconds: Optional[int]  # duração por corte (segundos), se especificada
+    total_minutes: Optional[float] # duração total (minutos), alternativa ao per_segment_seconds
+    start_timecode: Optional[str]  # início explícito
+    keywords: List[str]            # termos para achar o começo
+    use_best_moments: bool         # flag para "melhores momentos"
+def parse_natural_command(text: str) -> CommandSpec:
+    """
+    Extrai:
+      - quantidade de cortes: "3 cortes", "crie 2"
+      - duração por corte: "cortes de 30s", "clipes de 1min", "1 minuto"
+      - duração total: "corte de 10 minutos", "15min", "faça 5 minutos"
+      - timecode de início: "começando em 00:02:10:00" ou "a partir de 00:02:10,500"
+      - palavras-chave: "sobre X", "da parte do X", "tema X", "palavra X"
+      - melhores momentos: presença de "melhores momentos"
+    Regras:
+      - se per_segment_seconds e total_minutes vierem juntos, prioriza per_segment_seconds (mais específico)
+      - caso apenas total_minutes: cria 1 corte dessa duração (ou divide pelos 'total_segments' se quantidade também vier)
+    """
+    s = text.strip().lower()
+    # quantidade de cortes
+    count = 1
+    m = re.search(r'(\d+)\s*(?:cortes?|clipes?)\b', s)
+    if m:
+        count = max(1, int(m.group(1)))
+    else:
+        m = re.search(r'\bcrie\s+(\d+)\b', s)
+        if m:
+            count = max(1, int(m.group(1)))
+    # duração por corte (segundos)
+    per_seg_sec = None
+    m = re.search(r'(\d+)\s*(?:segundos?|s)\b', s)
+    if m:
+        per_seg_sec = int(m.group(1))
+    else:
+        # "de 30s", "30 s", etc.
+        m = re.search(r'de\s+(\d+)\s*s\b', s)
+        if m:
+            per_seg_sec = int(m.group(1))
+    # duração por corte em minutos -> segundos
+    if per_seg_sec is None:
+        m = re.search(r'(\d+)\s*(?:minutos?|min)\b', s)
+        if m:
+            per_seg_sec = int(m.group(1)) * 60
+        else:
+            # "de 1min"
+            m = re.search(r'de\s+(\d+)\s*min\b', s)
+            if m:
+                per_seg_sec = int(m.group(1)) * 60
+    # duração total (minutos)
+    total_min = None
+    # expressões como "corte de 10 minutos", "faça 5 minutos", "crie 15min"
+    m = re.search(r'\b(?:corte|faça|faca|crie|criar|gerar|make|montar)\b.*?(\d+)\s*(?:minutos?|min)\b', s)
+    if m:
+        total_min = float(m.group(1))
+    else:
+        m = re.search(r'\b(\d+)\s*(?:minutos?|min)\b', s)
+        if m:
+            total_min = float(m.group(1))
+    # timecode de início explícito
+    m = re.search(r'(?:começando|comecando|a partir de|starting at|start at)\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)', s)
+    start_tc = m.group(1) if m else None
+    # palavras-chave depois de "sobre", "da parte do", "tema", "assunto"
+    kw = []
+    kw_match = re.search(r'(?:sobre|da parte do|tema|assunto)\s+(.+)', s)
+    if kw_match:
+        # pega o resto da frase e quebra por vírgula
+        tail = kw_match.group(1)
+        kw = [t.strip() for t in re.split(r'[,\.;/]', tail) if t.strip()]
+    # flag de "melhores momentos"
+    best = bool(re.search(r'melhores momentos', s))
+    return CommandSpec(
+        total_segments=count,
+        per_segment_seconds=per_seg_sec,
+        total_minutes=total_min,
+        start_timecode=start_tc,
+        keywords=kw,
+        use_best_moments=best
+    )
+# =========================
+# Utilidades de seleção
+# =========================
 def find_keyword_in_segments(segs: List[Segment], keywords: List[str]) -> int:
+    if not segs or not keywords:
         return 0
+    best_idx, best_score = 0, -1
     for idx, seg in enumerate(segs):
         text_lower = seg.text.lower()
         score = sum(1 for kw in keywords if kw.lower() in text_lower)
         if score > best_score:
+            best_idx, best_score = idx, score
     return best_idx
+def create_continuous_segment_from(start_frame: int, duration_frames: int, segs_preview: List[Segment]) -> Segment:
+    end_frame = max(start_frame + duration_frames, start_frame + 1)
+    # preview opcional do texto
     text_parts = []
+    for seg in segs_preview[:10]:
+        if seg.text:
+            text_parts.append(seg.text[:80])
+    combined = " ".join(text_parts)[:300]
     return Segment(
         start_tc=frames_to_timecode(start_frame),
         end_tc=frames_to_timecode(end_frame),
         start_f=start_frame,
         end_f=end_frame,
+        text=("Corte contínuo: " + combined) if combined else "Corte contínuo",
         score=100.0
     )
+def process_with_command(
+    segs: List[Segment],
+    command: str,
+    use_llm: bool
+) -> List[Segment]:
+    """
+    Processa instruções naturais. Funciona com ou sem transcrição:
+      - sem transcrição: cria cortes contínuos a partir do timecode (ou 00:00)
+      - com transcrição: usa keywords/LLM para achar início e criar cortes
+    Regras de duração:
+      - se per_segment_seconds for fornecido -> aplica em cada corte
+      - do contrário, se total_minutes e total_segments > 1 -> divide igualmente
+      - se apenas total_minutes -> 1 corte com essa duração
+      - default se nada especificado -> 1 corte de 60s
+    """
+    spec = parse_natural_command(command)
+    # Determinar duração por corte (segundos)
+    if spec.per_segment_seconds:
+        per_seg_seconds = spec.per_segment_seconds
+        total_segments = max(1, spec.total_segments)
+    elif spec.total_minutes and spec.total_segments and spec.total_segments > 1:
+        total_seconds = int(spec.total_minutes * 60)
+        total_segments = spec.total_segments
+        per_seg_seconds = max(1, total_seconds // total_segments)
+    elif spec.total_minutes:
+        per_seg_seconds = int(spec.total_minutes * 60)
+        total_segments = 1
     else:
+        per_seg_seconds = 60
+        total_segments = max(1, spec.total_segments)
+    # Determinar ponto de início (frame)
+    start_frame = 0
+    if spec.start_timecode:
+        try:
+            start_frame = parse_timecode_to_frames(spec.start_timecode)
+        except Exception:
+            start_frame = 0
+    # Se houver transcrição, tentar achar índice inicial por palavra-chave/LLM
+    start_idx = None
+    if segs:
+        if spec.keywords:
+            start_idx = find_keyword_in_segments(segs, spec.keywords)
+        if use_llm and LLM_AVAILABLE and segs:
             try:
+                # prepara um preview leve de 80 segmentos (índice|tc|texto)
+                preview = []
+                for i, s in enumerate(segs[:80]):
+                    preview.append(f"{i}|{s.start_tc}|{(s.text or '')[:60]}")
+                preview_text = "\n".join(preview)
+                prompt = f"""Encontre o índice inicial do assunto solicitado, retornando apenas o número (ex: 42).
+BUSCAR: {' '.join(spec.keywords[:5]) or '(sem keywords)'}
+SEGMENTOS (índice|timecode|texto):
+{preview_text}
+"""
+                response = LLM.generate_content(
+                    prompt,
+                    generation_config={"temperature": 0.1, "max_output_tokens": 20}
+                )
+                text = (response.text or "").strip()
+                m = re.search(r'\b(\d+)\b', text)
+                if m:
+                    idx = int(m.group(1))
+                    if 0 <= idx < len(segs):
+                        start_idx = idx
+            except Exception:
                 pass
+    # Construir cortes
+    segments_out: List[Segment] = []
     if not segs:
+        # Sem transcrição: cortes contínuos a partir do timecode (ou zero)
+        base_frame = start_frame
+        for _ in range(total_segments):
+            duration_frames = int(per_seg_seconds * FPS)
+            seg = create_continuous_segment_from(base_frame, duration_frames, [])
+            segments_out.append(seg)
+            base_frame = seg.end_f
+        return segments_out
+    # Com transcrição
+    # Determina start_frame baseado em start_idx ou em timecode explícito
+    if start_idx is not None and 0 <= start_idx < len(segs):
+        start_frame = segs[start_idx].start_f
+    # Se já havia start_timecode, preserva; se não, usa 0 como fallback
+    base_frame = max(0, start_frame)
+    for _ in range(total_segments):
+        duration_frames = int(per_seg_seconds * FPS)
+        # usa preview de texto para descrição
+        seg_preview = segs[start_idx:start_idx + 10] if (start_idx is not None) else segs[:10]
+        seg = create_continuous_segment_from(base_frame, duration_frames, seg_preview)
+        segments_out.append(seg)
+        base_frame = seg.end_f
+    return segments_out
+# =========================
+# Modo Automático (score simples)
+# =========================
+def auto_score_segments(
+    segs: List[Segment],
+    num_segments: int,
+    custom_keywords: str,
+    weight_emotion: float,
+    weight_break: float,
+    weight_learn: float,
+    weight_viral: float
+) -> List[Segment]:
     for s in segs:
+        score = 0.0
+        text = (s.text or "").lower()
         if "medo" in text or "coragem" in text:
             score += weight_emotion
         if "nunca" in text or "de repente" in text:
             score += weight_learn
         if "segredo" in text or "verdade" in text:
             score += weight_viral
         if custom_keywords:
             for kw in custom_keywords.split(","):
                 if kw.strip().lower() in text:
                     score += 3.0
         s.score = score
     segs.sort(key=lambda x: x.score, reverse=True)
     return segs[:num_segments]
+# =========================
+# Edição de XML (Premiere)
+# =========================
 def deep_copy_element(elem: ET.Element) -> ET.Element:
     new = ET.Element(elem.tag, attrib=dict(elem.attrib))
     new.text = elem.text
         new.append(deep_copy_element(child))
     return new
 def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
     root = tree.getroot()
     seq = root.find(".//sequence")
     if seq is None:
         raise ValueError("Sequence não encontrada no XML")
     v_track = seq.find(".//media/video/track")
     a_track = seq.find(".//media/audio/track")
     if not v_track or not a_track:
         raise ValueError("Trilhas de vídeo/áudio não encontradas")
     v_template = v_track.find("./clipitem")
     a_template = a_track.find("./clipitem")
     # Limpa clips existentes
     for clip in list(v_track.findall("./clipitem")):
         v_track.remove(clip)
     for clip in list(a_track.findall("./clipitem")):
         a_track.remove(clip)
     # Adiciona novos clips
     timeline_pos = 0
     for i, seg in enumerate(segs, 1):
         duration = seg.end_f - seg.start_f
+        if duration <= 0:
+            continue
+        # Vídeo
         v_clip = ET.Element("clipitem", {"id": f"clip-v{i}"})
         ET.SubElement(v_clip, "name").text = f"Clip {i}"
         ET.SubElement(v_clip, "start").text = str(timeline_pos)
         ET.SubElement(v_clip, "end").text = str(timeline_pos + duration)
         ET.SubElement(v_clip, "in").text = str(seg.start_f)
         ET.SubElement(v_clip, "out").text = str(seg.end_f)
         if v_template is not None:
             rate = v_template.find("rate")
             if rate is not None:
             file_elem = v_template.find("file")
             if file_elem is not None:
                 v_clip.append(deep_copy_element(file_elem))
+        # Áudio
         a_clip = ET.Element("clipitem", {"id": f"clip-a{i}"})
         ET.SubElement(a_clip, "name").text = f"Clip {i}"
         ET.SubElement(a_clip, "start").text = str(timeline_pos)
         ET.SubElement(a_clip, "end").text = str(timeline_pos + duration)
         ET.SubElement(a_clip, "in").text = str(seg.start_f)
         ET.SubElement(a_clip, "out").text = str(seg.end_f)
         if a_template is not None:
             rate = a_template.find("rate")
             if rate is not None:
             file_elem = a_template.find("file")
             if file_elem is not None:
                 a_clip.append(deep_copy_element(file_elem))
         v_track.append(v_clip)
         a_track.append(a_clip)
         timeline_pos += duration
     return tree
+# =========================
+# Seleção (orquestração)
+# =========================
+def select_segments(
+    transcript_txt: str,
+    use_llm: bool,
+    num_segments: int,
+    custom_keywords: str,
+    manual_timecodes: str,
+    natural_instructions: str,
+    weight_emotion: float,
+    weight_break: float,
+    weight_learn: float,
+    weight_viral: float
+) -> List[Segment]:
+    # 1) Manual
+    manual = parse_manual_timecodes(manual_timecodes)
+    if manual:
+        result = []
+        for start_tc, end_tc in manual:
+            try:
+                result.append(Segment(
+                    start_tc=frames_to_timecode(parse_timecode_to_frames(start_tc)),
+                    end_tc=frames_to_timecode(parse_timecode_to_frames(end_tc)),
+                    start_f=parse_timecode_to_frames(start_tc),
+                    end_f=parse_timecode_to_frames(end_tc),
+                    text=f"Manual: {start_tc} - {end_tc}",
+                    score=100.0
+                ))
+            except Exception:
+                pass
+        return result
+    # 2) Parser de transcrição (se houver)
+    segs = parse_transcript(transcript_txt) if transcript_txt else []
+    # 3) Linguagem natural (sempre permitido; funciona com ou sem transcrição)
+    if natural_instructions.strip():
+        return process_with_command(segs, natural_instructions, use_llm and LLM_AVAILABLE)
+    # 4) Automático
+    if not segs:
+        raise ValueError("Nenhum segmento encontrado. Forneça uma transcrição, minutagens ou um comando em linguagem natural.")
+    return auto_score_segments(
+        segs, num_segments, custom_keywords,
+        weight_emotion, weight_break, weight_learn, weight_viral
+    )
+# =========================
+# Pipeline principal
+# =========================
+def process_files(
+    xml_file, txt_file, use_llm, num_segments,
+    custom_keywords, manual_timecodes, natural_instructions,
+    weight_emotion, weight_break, weight_learn, weight_viral
+):
     if not xml_file:
+        return "Envie o XML", None, f"LLM: {LLM_AVAILABLE}"
     try:
+        # Lê transcrição apenas se necessário
         transcript = ""
         manual = parse_manual_timecodes(manual_timecodes)
+        if not manual and txt_file:
+            with open(txt_file.name, "r", encoding="utf-8-sig") as f:
                 transcript = f.read()
         # Seleciona segmentos
         segments = select_segments(
             transcript, use_llm and LLM_AVAILABLE, num_segments,
             custom_keywords, manual_timecodes, natural_instructions,
             weight_emotion, weight_break, weight_learn, weight_viral
         )
         if not segments:
+            return "Nenhum segmento selecionado", None, f"LLM: {LLM_AVAILABLE}"
         # Edita XML
         tree = ET.parse(xml_file.name)
         tree = edit_xml(tree, segments)
         # Salva
         basename = os.path.splitext(os.path.basename(xml_file.name))[0]
         output = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
         tree.write(output, encoding="utf-8", xml_declaration=True)
         # Resumo
         total_sec = sum((s.end_f - s.start_f) / FPS for s in segments)
+        total_min = total_sec / 60.0
+        mode = "MANUAL" if manual else ("IA/NATURAL" if natural_instructions.strip() else "AUTOMÁTICO")
+        summary_lines = [f"{len(segments)} corte(s) | {total_min:.1f} min total | Modo: {mode}"]
         for i, seg in enumerate(segments, 1):
             dur_sec = (seg.end_f - seg.start_f) / FPS
+            line = f"{i}. {seg.start_tc} → {seg.end_tc} ({dur_sec/60:.1f} min)"
             if seg.text and len(seg.text) > 50:
+                line += f"\n   {seg.text[:120]}..."
+            summary_lines.append(line)
+        summary = "\n".join(summary_lines)
+        status = f"Sucesso | {mode} | {total_min:.1f} min | LLM: {LLM_AVAILABLE}"
         return summary, output, status
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return f"Erro: {str(e)}", None, f"LLM: {LLM_AVAILABLE}"
+# =========================
+# Interface (Gradio)
+# =========================
 with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere") as demo:
+    gr.Markdown("# Editor XML Premiere - IA")
+    gr.Markdown("Cortes com transcrição, minutagens ou comando em linguagem natural.")
     with gr.Row():
+        xml_in = gr.File(label="XML do Premiere", file_types=[".xml"])
+        txt_in = gr.File(label="Transcrição (.txt) - opcional", file_types=[".txt"])
     with gr.Row():
+        use_llm = gr.Checkbox(label="Usar IA (Gemini) quando útil", value=USE_LLM_DEFAULT and LLM_AVAILABLE)
+        num_segments = gr.Slider(2, 20, 5, 1, label="Segmentos (modo automático)")
+    with gr.Accordion("Comando em linguagem natural", open=True):
         gr.Markdown("""
+Exemplos:
+- "Crie 1 corte de 10 minutos começando da parte do tenista"
+- "Quero 3 cortes de 30s sobre Maria e José"
+- "Faça 2 cortes de 45s começando em 00:02:10:00"
+Se não fornecer transcrição, os cortes serão contínuos a partir do timecode indicado (ou 00:00:00:00).
         """)
         natural_instructions = gr.Textbox(
+            label="Seu comando",
+            placeholder='Ex: "Crie 2 cortes de 45s sobre coragem e disciplina, começando em 00:01:00:00"',
             lines=2
         )
+    with gr.Accordion("Minutagens manuais", open=False):
         manual_timecodes = gr.Textbox(
             label="Timecodes (um por linha)",
             placeholder="00:21:18:09 - 00:31:18:09",
             lines=3
         )
+    with gr.Accordion("Modo automático (com transcrição)", open=False):
+        custom_keywords = gr.Textbox(label="Palavras-chave (separadas por vírgula)")
         with gr.Row():
+            weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: emoção")
+            weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: quebra")
         with gr.Row():
+            weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: aprendizado")
+            weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: viral")
+    btn = gr.Button("Processar", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=2):
+            summary_out = gr.Textbox(label="Resumo", lines=12)
         with gr.Column(scale=1):
             status_out = gr.Textbox(label="Status")
+            file_out = gr.File(label="Download")
     btn.click(
         process_files,
         [xml_in, txt_in, use_llm, num_segments, custom_keywords,
     )
 if __name__ == "__main__":
+    demo.launch()