Spaces:

leicam
/

EditorAutomaticoXML

Sleeping

App Files Files Community

leicam commited on Oct 7, 2025

Commit

603b064

verified ·

1 Parent(s): f248bc7

Update app.py

Browse files

Files changed (1) hide show

app.py +340 -261

app.py CHANGED Viewed

@@ -2,15 +2,15 @@ import os
 import re
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass
-from typing import List
 import gradio as gr
-# Optional LLM (Gemini)
 USE_LLM_DEFAULT = True
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
 LLM_AVAILABLE = False
-LLM_MODEL_NAME = "gemini-2.5-flash"
 try:
     if GEMINI_API_KEY:
         import google.generativeai as genai
@@ -37,7 +37,7 @@ class Segment:
     text: str
     score: float
-# ---- Timecode helpers ----
 def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
     m = re.match(r"^\s*(\d{2}):(\d{2}):(\d{2})[:;](\d{2})\s*$", tc)
     if not m:
@@ -54,12 +54,20 @@ def frames_to_timecode(frames: int, fps: int = FPS) -> str:
     ff = rem % fps
     return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
-# ---- Transcript parsing & scoring ----
 def parse_transcript(txt: str) -> List[Segment]:
     lines = [l.strip() for l in txt.splitlines() if l.strip()]
     results: List[Segment] = []
     pat_range = re.compile(r"^\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s+(.*)$")
     pat_point = re.compile(r"^(\d{2}:\d{2}:\d{2}[:;]\d{2})\s+(.*)$")
     for l in lines:
         m = pat_range.match(l)
         if m:
@@ -72,6 +80,7 @@ def parse_transcript(txt: str) -> List[Segment]:
             except Exception:
                 continue
             continue
         m = pat_point.match(l)
         if m:
             s, text = m.groups()
@@ -82,32 +91,11 @@ def parse_transcript(txt: str) -> List[Segment]:
                 results.append(Segment(s, e, s_f, e_f, text, 0.0))
             except Exception:
                 continue
-    return results
-def keyword_score(text: str, custom_keywords: str = "", weight_emotion: float = 2.0,
-                  weight_break: float = 1.5, weight_learn: float = 1.2, weight_viral: float = 1.0) -> float:
-    t = text.lower()
-    kw_emotion = ["medo", "coragem", "raiva", "chorei", "feliz", "triste", "emocion", "culpa", "vergonha", "orgulho"]
-    kw_break   = ["nunca", "de repente", "contraintuitivo", "ninguém te conta", "parei", "decidi", "quebrei", "virada"]
-    kw_learn   = ["aprendi", "descobri", "lição", "entendi", "percebi", "insight", "melhorou", "piorou"]
-    kw_viral   = ["segredo", "verdade", "por trás", "3 passos", "passo a passo", "como eu", "ninguém fala"]
-    score = 0.0
-    for kw in kw_emotion: score += weight_emotion if kw in t else 0.0
-    for kw in kw_break:   score += weight_break if kw in t else 0.0
-    for kw in kw_learn:   score += weight_learn if kw in t else 0.0
-    for kw in kw_viral:   score += weight_viral if kw in t else 0.0
-    if custom_keywords.strip():
-        custom_kw_list = [kw.strip().lower() for kw in custom_keywords.split(",") if kw.strip()]
-        for kw in custom_kw_list:
-            score += 3.0 if kw in t else 0.0
-    score += 0.2 * text.count("!")
-    score += 0.0005 * len(text)
-    return score
-def parse_manual_timecodes(manual_input: str) -> List[tuple]:
     manual_ranges = []
     normalized = manual_input.replace(",", "\n")
     lines = [l.strip() for l in normalized.splitlines() if l.strip()]
@@ -122,90 +110,216 @@ def parse_manual_timecodes(manual_input: str) -> List[tuple]:
     return manual_ranges
-def llm_process_natural_instructions(transcript_txt: str, natural_instructions: str, num_segments: int) -> List[Segment]:
-    if not LLM_AVAILABLE:
-        raise ValueError("LLM não disponível. Configure GEMINI_API_KEY para usar instruções em linguagem natural.")
-    segs = parse_transcript(transcript_txt)
     if not segs:
-        raise ValueError("Nenhum trecho válido encontrado na transcrição.")
-    segments_text = "\n".join([
-        f"{i}. [{s.start_tc} - {s.end_tc}] {s.text}"
-        for i, s in enumerate(segs)
-    ])
-    prompt = f"""Você é um editor de vídeo profissional. Analise a transcrição abaixo e as instruções do usuário.
 INSTRUÇÕES DO USUÁRIO:
-{natural_instructions}
-TRANSCRIÇÃO COM TIMECODES:
-{segments_text}
 TAREFA:
-1. Interprete as instruções do usuário
-2. Selecione os {num_segments} trechos que melhor atendem às instruções
-3. Se a instrução for para REMOVER algo, selecione os trechos que NÃO contêm aquilo
-4. Se a instrução for para INCLUIR algo específico, selecione apenas os trechos que contêm aquilo
-5. Priorize trechos com narrativa coerente e impactantes
-RESPONDA APENAS com os índices dos trechos selecionados, separados por vírgula (ex: 0,3,5,8,12).
-Não adicione explicações, apenas os números."""
     try:
-        response = LLM.generate_content(prompt, generation_config={"temperature": 0.3})
         txt = (response.text or "").strip()
-        idxs = [int(x) for x in re.findall(r"\d+", txt)]
-        idxs = [i for i in idxs if 0 <= i < len(segs)]
-        if not idxs:
-            raise ValueError("LLM não retornou índices válidos")
-        selected = [segs[i] for i in idxs[:num_segments]]
-        selected.sort(key=lambda x: x.start_f)
-        return selected
     except Exception as e:
-        raise ValueError(f"Erro ao processar instruções com LLM: {e}")
-def llm_rank_segments(candidates: List[Segment], num_segments: int, custom_instructions: str = "") -> List[Segment]:
-    if not LLM_AVAILABLE:
-        return candidates[:num_segments]
-    sample = "\n".join([f"{i}. [{c.start_tc}-{c.end_tc}] {c.text[:300]}" for i, c in enumerate(candidates)])
-    base_prompt = (
-        f"Você é um editor profissional. Selecione exatamente {num_segments} trechos mais fortes "
-        "pela emoção, quebra de expectativa e aprendizado, mantendo uma mini-narrativa coerente.\n\n"
-    )
-    if custom_instructions.strip():
-        base_prompt += f"INSTRUÇÕES ADICIONAIS: {custom_instructions}\n\n"
-    base_prompt += "Responda apenas com índices (0-based) separados por vírgula.\n\n" + sample
-    try:
-        r = LLM.generate_content(base_prompt, generation_config={"temperature": 0.2})
-        txt = (r.text or "").strip()
-        idxs = [int(x) for x in re.findall(r"\d+", txt)]
-        idxs = [i for i in idxs if 0 <= i < len(candidates)]
-        if len(idxs) >= num_segments:
-            return [candidates[i] for i in idxs[:num_segments]]
-        elif len(idxs) > 0:
-            return [candidates[i] for i in idxs]
-    except Exception as e:
-        print(f"Erro no LLM: {e}")
-    return candidates[:num_segments]
 def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
                    custom_keywords: str, manual_timecodes: str, natural_instructions: str,
                    weight_emotion: float, weight_break: float,
                    weight_learn: float, weight_viral: float) -> List[Segment]:
     manual_ranges = parse_manual_timecodes(manual_timecodes)
     if manual_ranges:
         result_segs = []
@@ -215,46 +329,42 @@ def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
                 end_f = parse_timecode_to_frames(end_tc)
                 if end_f > start_f:
                     result_segs.append(Segment(
-                        start_tc=start_tc,
-                        end_tc=end_tc,
-                        start_f=start_f,
-                        end_f=end_f,
-                        text=f"Corte manual {start_tc} - {end_tc}",
                         score=100.0
                     ))
             except Exception as e:
-                print(f"Erro ao processar timecode manual {start_tc}-{end_tc}: {e}")
-                continue
-        if not result_segs:
-            raise ValueError("Nenhum timecode manual válido encontrado.")
-        return result_segs
-    if natural_instructions.strip() and use_llm and LLM_AVAILABLE:
-        return llm_process_natural_instructions(transcript_txt, natural_instructions, num_segments)
     segs = parse_transcript(transcript_txt)
     if not segs:
-        raise ValueError("Nenhum trecho válido encontrado na transcrição.")
-    for s in segs:
-        s.score = keyword_score(s.text, custom_keywords, weight_emotion, weight_break, weight_learn, weight_viral)
-    segs.sort(key=lambda x: x.score, reverse=True)
-    top = segs[:min(20, len(segs))]
-    if use_llm and LLM_AVAILABLE:
-        ranked = llm_rank_segments(top, num_segments, "")
-        return ranked
-    return top[:num_segments]
-# ---- XML editing ----
 def get_sequence(root: ET.Element) -> ET.Element:
     seq = root.find(".//sequence")
     if seq is None:
-        raise ValueError("Nenhuma <sequence> encontrada no XML.")
     return seq
 def ensure_rate_24fps(element: ET.Element):
@@ -265,10 +375,6 @@ def ensure_rate_24fps(element: ET.Element):
     if tb is None:
         tb = ET.SubElement(rate, "timebase")
     tb.text = str(FPS)
-    ntsc = rate.find("ntsc")
-    if ntsc is None:
-        ntsc = ET.SubElement(rate, "ntsc")
-    ntsc.text = "FALSE"
 def deep_copy(elem: ET.Element) -> ET.Element:
     new = ET.Element(elem.tag, attrib=elem.attrib)
@@ -278,22 +384,8 @@ def deep_copy(elem: ET.Element) -> ET.Element:
         new.append(deep_copy(child))
     return new
-def clear_clipitems(track_elem: ET.Element):
-    for ci in list(track_elem.findall("./clipitem")):
-        track_elem.remove(ci)
-def first_clipitem_ref(track_elem: ET.Element):
-    return track_elem.find("./clipitem")
-def copy_file_ref(from_clip: ET.Element, to_clip: ET.Element):
-    src_file = from_clip.find("./file")
-    if src_file is not None:
-        old = to_clip.find("./file")
-        if old is not None:
-            to_clip.remove(old)
-        to_clip.append(deep_copy(src_file))
-def build_clipitem(template_ci: ET.Element, cid: str, start_f: int, end_f: int, in_f: int, out_f: int, linked_ids):
     ci = ET.Element("clipitem", {"id": cid})
     name = template_ci.find("name")
     ci_name = ET.SubElement(ci, "name")
@@ -307,204 +399,191 @@ def build_clipitem(template_ci: ET.Element, cid: str, start_f: int, end_f: int,
         t = ET.SubElement(ci, tag)
         t.text = str(val)
-    copy_file_ref(template_ci, ci)
     for lid in linked_ids:
         link = ET.SubElement(ci, "link")
         linkclipref = ET.SubElement(link, "linkclipref")
         linkclipref.text = lid
-        mediatype = ET.SubElement(link, "mediatype")
-        mediatype.text = "video" if "-v" in lid else "audio"
     return ci
 def edit_sequence_with_segments(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
     root = tree.getroot()
     seq = get_sequence(root)
-    ensure_rate_24fps(seq)
     video_track = seq.find("./media/video/track")
     audio_track = seq.find("./media/audio/track")
-    if video_track is None or audio_track is None:
-        raise ValueError("Estrutura de trilhas não encontrada.")
-    v_tpl = first_clipitem_ref(video_track)
-    a_tpl = first_clipitem_ref(audio_track)
-    if v_tpl is None or a_tpl is None:
-        raise ValueError("Não há clipitem de referência em V1 e/ou A1.")
-    clear_clipitems(video_track)
-    clear_clipitems(audio_track)
     cursor = 0
     for idx, s in enumerate(segs, start=1):
         dur = s.end_f - s.start_f
-        start = cursor
-        end   = cursor + dur
-        v_id = f"clipitem-v-cut{idx}"
-        a_id = f"clipitem-a-cut{idx}"
         v_ci = build_clipitem(v_tpl, v_id, start, end, s.start_f, s.end_f, [a_id])
         a_ci = build_clipitem(a_tpl, a_id, start, end, s.start_f, s.end_f, [v_id])
         video_track.append(v_ci)
         audio_track.append(a_ci)
         cursor = end
     return tree
-# ---- Gradio app ----
-def process_xml_and_transcript(premiere_xml_file, transcript_txt_file, use_llm,
-                               num_segments, custom_keywords, manual_timecodes, natural_instructions,
                                weight_emotion, weight_break, weight_learn, weight_viral):
-    if premiere_xml_file is None:
-        return "Envie o XML do Premiere.", None, f"LLM disponível: {LLM_AVAILABLE}"
     manual_ranges = parse_manual_timecodes(manual_timecodes)
-    has_natural_instructions = natural_instructions.strip() != ""
     if manual_ranges:
         mode = "MANUAL"
         transcript = ""
-    elif has_natural_instructions:
-        mode = "INSTRUÇÕES NATURAIS (IA)"
-        if transcript_txt_file is None:
-            return "Para usar instruções em linguagem natural, envie a transcrição.", None, f"LLM disponível: {LLM_AVAILABLE}"
         if not LLM_AVAILABLE:
-            return "LLM não disponível. Configure GEMINI_API_KEY para usar instruções naturais.", None, f"LLM disponível: {LLM_AVAILABLE}"
-        with open(transcript_txt_file.name, "r", encoding="utf-8") as f:
             transcript = f.read()
     else:
         mode = "AUTOMÁTICO"
-        if transcript_txt_file is None:
-            return "Envie a transcrição em .txt ou forneça minutagens manuais.", None, f"LLM disponível: {LLM_AVAILABLE}"
-        with open(transcript_txt_file.name, "r", encoding="utf-8") as f:
             transcript = f.read()
-    segs = select_segments(transcript, use_llm and LLM_AVAILABLE, num_segments,
-                          custom_keywords, manual_timecodes, natural_instructions,
-                          weight_emotion, weight_break, weight_learn, weight_viral)
-    tree = ET.parse(premiere_xml_file.name)
-    tree = edit_sequence_with_segments(tree, segs)
-    base = os.path.splitext(os.path.basename(premiere_xml_file.name))[0]
-    out_path = os.path.join(OUTPUT_DIR, f"{base}_EDITADO.xml")
-    tree.write(out_path, encoding="utf-8", xml_declaration=True)
-    resumo = f"✂️ {len(segs)} cortes aplicados - Modo: {mode} (24 fps):\n\n"
-    for i, s in enumerate(segs, 1):
-        dur_sec = (s.end_f - s.start_f) / FPS
-        resumo += f"{i}. {s.start_tc} → {s.end_tc} ({dur_sec:.1f}s)\n"
-        if not manual_ranges:
-            resumo += f"   Score: {s.score:.1f} | {s.text[:150]}\n"
-        resumo += "\n"
-    status = f"✓ Modo: {mode} | LLM disponível: {LLM_AVAILABLE} | LLM usado: {use_llm and LLM_AVAILABLE}"
-    return resumo, out_path, status
 css = """
 :root {
-    --neon: #39FF14;
-    --txt: #1a1a1a;
-    --muted: #4b5563;
-    --line: #d1d5db;
 }
 .gradio-container {
-    font-family: 'Manrope', system-ui, sans-serif !important;
-    background: linear-gradient(135deg, rgba(57,255,20,0.03) 0%, #fff 100%);
 }
-.gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container label {
-    color: var(--txt) !important;
-    font-weight: 700 !important;
 }
 .gradio-container button.primary {
-    background: var(--neon) !important;
     color: #000 !important;
-    font-weight: 800 !important;
-    border-radius: 10px !important;
-}
-.gradio-container input, .gradio-container textarea {
-    color: var(--txt) !important;
-    border-radius: 12px !important;
-}
-.gradio-container input[type="checkbox"]:checked {
-    background: var(--neon) !important;
 }
 """
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
-    gr.HTML("""
-    <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;700;800&display=swap" rel="stylesheet">
-    <div style="text-align: center; padding: 24px 0;">
-        <h1 style="color: #1a1a1a; font-weight: 800;">Agente de Edição XML · Premiere</h1>
-        <p style="color: #4b5563;">Edite sua sequência do Premiere com controle total</p>
-    </div>
-    """)
     with gr.Row():
-        with gr.Column():
-            xml_in = gr.File(label="XML da sequência (FCP XML)", file_types=[".xml"])
-            txt_in = gr.File(label="Transcrição (.txt)", file_types=[".txt"])
-        with gr.Column():
-            use_llm = gr.Checkbox(
-                label="Usar Potência Criativa (IA)",
-                value=USE_LLM_DEFAULT and LLM_AVAILABLE
-            )
-            num_segments = gr.Slider(
-                minimum=2, maximum=10, step=1, value=5,
-                label="Número de segmentos"
-            )
-    with gr.Accordion("INSTRUÇÕES EM LINGUAGEM NATURAL (IA)", open=True):
         natural_instructions = gr.Textbox(
-            label="Suas instruções para a IA",
-            placeholder='Exemplos:\n"Separe os 5 melhores momentos"\n"Recorte apenas a parte sobre medo"\n"Remova quando fala almôndega"',
-            lines=4
         )
-    with gr.Accordion("MINUTAGENS MANUAIS", open=False):
         manual_timecodes = gr.Textbox(
-            label="Cole os timecodes exatos",
             placeholder="00:01:23:15 - 00:02:45:10\n00:05:30:00 - 00:07:15:22",
-            lines=5
-        )
-    with gr.Accordion("Palavras-chave Personalizadas", open=False):
-        custom_keywords = gr.Textbox(
-            label="Palavras-chave (separadas por vírgula)",
-            placeholder="transformação, resultado, método"
         )
-    with gr.Accordion("Ajuste de Pesos", open=False):
         with gr.Row():
-            weight_emotion = gr.Slider(0, 5, value=2.0, step=0.1, label="Emoção")
-            weight_break = gr.Slider(0, 5, value=1.5, step=0.1, label="Quebra")
         with gr.Row():
-            weight_learn = gr.Slider(0, 5, value=1.2, step=0.1, label="Aprendizado")
-            weight_viral = gr.Slider(0, 5, value=1.0, step=0.1, label="Viral")
-    run_btn = gr.Button("Processar e Gerar XML Editado", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=2):
-            resumo_out = gr.Textbox(label="Resumo dos cortes", lines=15)
         with gr.Column(scale=1):
             status_out = gr.Textbox(label="Status")
-            file_out = gr.File(label="Download do XML")
     run_btn.click(
-        process_xml_and_transcript,
-        inputs=[xml_in, txt_in, use_llm, num_segments, custom_keywords,
-                manual_timecodes, natural_instructions, weight_emotion, weight_break, weight_learn, weight_viral],
         outputs=[resumo_out, file_out, status_out]
     )

 import re
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass
+from typing import List, Tuple
 import gradio as gr
+# LLM Configuration
 USE_LLM_DEFAULT = True
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
 LLM_AVAILABLE = False
+LLM_MODEL_NAME = "gemini-2.0-flash-exp"
 try:
     if GEMINI_API_KEY:
         import google.generativeai as genai
     text: str
     score: float
+# ============ TIMECODE FUNCTIONS ============
 def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
     m = re.match(r"^\s*(\d{2}):(\d{2}):(\d{2})[:;](\d{2})\s*$", tc)
     if not m:
     ff = rem % fps
     return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
+def frames_to_seconds(frames: int, fps: int = FPS) -> float:
+    return frames / fps
+def seconds_to_frames(seconds: float, fps: int = FPS) -> int:
+    return int(seconds * fps)
+# ============ TRANSCRIPT PARSING ============
 def parse_transcript(txt: str) -> List[Segment]:
     lines = [l.strip() for l in txt.splitlines() if l.strip()]
     results: List[Segment] = []
     pat_range = re.compile(r"^\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s+(.*)$")
     pat_point = re.compile(r"^(\d{2}:\d{2}:\d{2}[:;]\d{2})\s+(.*)$")
     for l in lines:
         m = pat_range.match(l)
         if m:
             except Exception:
                 continue
             continue
         m = pat_point.match(l)
         if m:
             s, text = m.groups()
                 results.append(Segment(s, e, s_f, e_f, text, 0.0))
             except Exception:
                 continue
+    return results
+# ============ MANUAL TIMECODES ============
+def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
     manual_ranges = []
     normalized = manual_input.replace(",", "\n")
     lines = [l.strip() for l in normalized.splitlines() if l.strip()]
     return manual_ranges
+# ============ SEGMENT PROCESSING ============
+def get_total_duration(segs: List[Segment]) -> float:
+    """Retorna duração total em segundos"""
+    return sum((s.end_f - s.start_f) / FPS for s in segs)
+def create_target_selection(segs: List[Segment], target_minutes: float, strategy: str = "distributed") -> List[Segment]:
+    """
+    Cria uma seleção de segmentos para atingir duração alvo.
+    strategy: 'distributed' = espalhado pelo vídeo, 'sequential' = em sequência
+    """
+    target_seconds = target_minutes * 60
+    total_available = get_total_duration(segs)
+    if target_seconds > total_available:
+        print(f"Aviso: Duração solicitada ({target_minutes:.1f}min) maior que disponível ({total_available/60:.1f}min)")
+        return segs
+    if strategy == "distributed":
+        # Distribui seleção ao longo do vídeo
+        ratio = target_seconds / total_available
+        selected = []
+        current_duration = 0
+        # Seleciona proporcionalmente de cada parte
+        for seg in segs:
+            if current_duration >= target_seconds:
+                break
+            seg_duration = (seg.end_f - seg.start_f) / FPS
+            if ratio >= 0.8 or (current_duration + seg_duration <= target_seconds * 1.1):
+                selected.append(seg)
+                current_duration += seg_duration
+        return selected
+    else:  # sequential
+        selected = []
+        current_duration = 0
+        for seg in segs:
+            if current_duration >= target_seconds:
+                break
+            selected.append(seg)
+            current_duration += (seg.end_f - seg.start_f) / FPS
+        return selected
+def merge_close_segments(segs: List[Segment], max_gap_seconds: float = 3.0) -> List[Segment]:
+    """Mescla segmentos que estão próximos um do outro"""
     if not segs:
+        return []
+    segs_sorted = sorted(segs, key=lambda x: x.start_f)
+    merged = [segs_sorted[0]]
+    max_gap_frames = int(max_gap_seconds * FPS)
+    for current in segs_sorted[1:]:
+        last = merged[-1]
+        gap = current.start_f - last.end_f
+        if gap <= max_gap_frames and gap >= 0:
+            # Mescla os segmentos
+            merged[-1] = Segment(
+                start_tc=last.start_tc,
+                end_tc=current.end_tc,
+                start_f=last.start_f,
+                end_f=current.end_f,
+                text=last.text + " [...] " + current.text,
+                score=(last.score + current.score) / 2
+            )
+        else:
+            merged.append(current)
+    return merged
+# ============ AI PROCESSING ============
+def extract_duration_from_instructions(instructions: str) -> float:
+    """Extrai duração em minutos das instruções do usuário"""
+    # Procura por padrões como "10 minutos", "5 min", "15 minutes"
+    patterns = [
+        r"(\d+)\s*minutos?",
+        r"(\d+)\s*min\b",
+        r"(\d+)\s*minutes?",
+        r"(\d+)m\b"
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, instructions.lower())
+        if match:
+            return float(match.group(1))
+    return None
+def ai_select_segments(segs: List[Segment], instructions: str) -> List[Segment]:
+    """Usa IA para selecionar segmentos baseado em instruções"""
+    if not LLM_AVAILABLE:
+        raise ValueError("IA não disponível. Configure GEMINI_API_KEY")
+    total_duration_min = get_total_duration(segs) / 60
+    target_duration = extract_duration_from_instructions(instructions)
+    # Cria resumo dos segmentos (agrupados para prompt menor)
+    segment_summary = []
+    for i in range(0, len(segs), 5):
+        group = segs[i:i+5]
+        start_tc = group[0].start_tc
+        end_tc = group[-1].end_tc
+        duration = sum((s.end_f - s.start_f) / FPS for s in group)
+        combined_text = " ".join([s.text[:100] for s in group])
+        segment_summary.append(f"Grupo {i//5}: [{start_tc}-{end_tc}] ({duration:.0f}s) {combined_text[:200]}")
+    prompt = f"""Você é um editor de vídeo profissional.
 INSTRUÇÕES DO USUÁRIO:
+{instructions}
+INFORMAÇÕES:
+- Total disponível: {total_duration_min:.1f} minutos ({len(segs)} segmentos)
+- Duração alvo detectada: {target_duration if target_duration else 'não especificada'} minutos
+SEGMENTOS (agrupados de 5 em 5):
+{chr(10).join(segment_summary[:50])}
 TAREFA:
+1. Identifique quais GRUPOS de segmentos atendem às instruções
+2. Se foi solicitada duração específica, selecione grupos suficientes para atingi-la
+3. Distribua a seleção: pegue grupos do INÍCIO, MEIO e FIM do vídeo
+4. Retorne os NÚMEROS dos grupos selecionados
+RESPONDA APENAS com números separados por vírgula (ex: 0,2,5,8,12,15,20,25,30)
+Selecione pelo menos 10-20 grupos para ter duração adequada."""
     try:
+        response = LLM.generate_content(prompt, generation_config={"temperature": 0.4, "max_output_tokens": 500})
         txt = (response.text or "").strip()
+        # Extrai números dos grupos
+        group_indices = [int(x) for x in re.findall(r"\d+", txt)]
+        # Converte grupos em segmentos individuais
+        selected_segs = []
+        for group_idx in group_indices:
+            start_idx = group_idx * 5
+            end_idx = min(start_idx + 5, len(segs))
+            if start_idx < len(segs):
+                selected_segs.extend(segs[start_idx:end_idx])
+        if not selected_segs:
+            # Fallback: pega distribuído
+            step = max(1, len(segs) // 30)
+            selected_segs = segs[::step]
+        # Remove duplicatas e ordena
+        seen = set()
+        unique_segs = []
+        for seg in selected_segs:
+            key = (seg.start_f, seg.end_f)
+            if key not in seen:
+                seen.add(key)
+                unique_segs.append(seg)
+        unique_segs.sort(key=lambda x: x.start_f)
+        # Ajusta para duração alvo se especificada
+        if target_duration:
+            unique_segs = create_target_selection(unique_segs, target_duration, "distributed")
+        # Mescla segmentos próximos
+        final_segs = merge_close_segments(unique_segs, max_gap_seconds=3.0)
+        final_duration = get_total_duration(final_segs)
+        print(f"✓ Selecionados {len(final_segs)} trechos, duração total: {final_duration/60:.1f} min")
+        return final_segs
     except Exception as e:
+        print(f"Erro na IA: {e}")
+        raise
+# ============ KEYWORD SCORING ============
+def keyword_score(text: str, custom_keywords: str = "", weights: dict = None) -> float:
+    if weights is None:
+        weights = {"emotion": 2.0, "break": 1.5, "learn": 1.2, "viral": 1.0}
+    t = text.lower()
+    kw_emotion = ["medo", "coragem", "raiva", "chorei", "feliz", "triste", "emocion"]
+    kw_break   = ["nunca", "de repente", "contraintuitivo", "virada"]
+    kw_learn   = ["aprendi", "descobri", "lição", "entendi", "percebi"]
+    kw_viral   = ["segredo", "verdade", "3 passos", "como eu"]
+    score = 0.0
+    for kw in kw_emotion: score += weights["emotion"] if kw in t else 0.0
+    for kw in kw_break:   score += weights["break"] if kw in t else 0.0
+    for kw in kw_learn:   score += weights["learn"] if kw in t else 0.0
+    for kw in kw_viral:   score += weights["viral"] if kw in t else 0.0
+    if custom_keywords.strip():
+        for kw in custom_keywords.split(","):
+            if kw.strip().lower() in t:
+                score += 3.0
+    score += 0.2 * text.count("!")
+    return score
+# ============ MAIN SELECTION LOGIC ============
 def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
                    custom_keywords: str, manual_timecodes: str, natural_instructions: str,
                    weight_emotion: float, weight_break: float,
                    weight_learn: float, weight_viral: float) -> List[Segment]:
+    # Priority 1: Manual timecodes
     manual_ranges = parse_manual_timecodes(manual_timecodes)
     if manual_ranges:
         result_segs = []
                 end_f = parse_timecode_to_frames(end_tc)
                 if end_f > start_f:
                     result_segs.append(Segment(
+                        start_tc=start_tc, end_tc=end_tc,
+                        start_f=start_f, end_f=end_f,
+                        text=f"Manual: {start_tc} - {end_tc}",
                         score=100.0
                     ))
             except Exception as e:
+                print(f"Erro: {e}")
+        return result_segs if result_segs else []
+    # Priority 2: AI with natural instructions
     segs = parse_transcript(transcript_txt)
     if not segs:
+        raise ValueError("Nenhum trecho encontrado na transcrição")
+    if natural_instructions.strip() and use_llm and LLM_AVAILABLE:
+        return ai_select_segments(segs, natural_instructions)
+    # Priority 3: Automatic scoring
+    weights = {
+        "emotion": weight_emotion,
+        "break": weight_break,
+        "learn": weight_learn,
+        "viral": weight_viral
+    }
+    for s in segs:
+        s.score = keyword_score(s.text, custom_keywords, weights)
+    segs.sort(key=lambda x: x.score, reverse=True)
+    return segs[:num_segments]
+# ============ XML EDITING ============
 def get_sequence(root: ET.Element) -> ET.Element:
     seq = root.find(".//sequence")
     if seq is None:
+        raise ValueError("Nenhuma <sequence> encontrada")
     return seq
 def ensure_rate_24fps(element: ET.Element):
     if tb is None:
         tb = ET.SubElement(rate, "timebase")
     tb.text = str(FPS)
 def deep_copy(elem: ET.Element) -> ET.Element:
     new = ET.Element(elem.tag, attrib=elem.attrib)
         new.append(deep_copy(child))
     return new
+def build_clipitem(template_ci: ET.Element, cid: str, start_f: int, end_f: int,
+                   in_f: int, out_f: int, linked_ids):
     ci = ET.Element("clipitem", {"id": cid})
     name = template_ci.find("name")
     ci_name = ET.SubElement(ci, "name")
         t = ET.SubElement(ci, tag)
         t.text = str(val)
+    src_file = template_ci.find("./file")
+    if src_file is not None:
+        ci.append(deep_copy(src_file))
     for lid in linked_ids:
         link = ET.SubElement(ci, "link")
         linkclipref = ET.SubElement(link, "linkclipref")
         linkclipref.text = lid
     return ci
 def edit_sequence_with_segments(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
     root = tree.getroot()
     seq = get_sequence(root)
     video_track = seq.find("./media/video/track")
     audio_track = seq.find("./media/audio/track")
+    if not video_track or not audio_track:
+        raise ValueError("Estrutura de trilhas não encontrada")
+    v_tpl = video_track.find("./clipitem")
+    a_tpl = audio_track.find("./clipitem")
+    # Limpa trilhas
+    for ci in list(video_track.findall("./clipitem")):
+        video_track.remove(ci)
+    for ci in list(audio_track.findall("./clipitem")):
+        audio_track.remove(ci)
+    # Adiciona novos clips
     cursor = 0
     for idx, s in enumerate(segs, start=1):
         dur = s.end_f - s.start_f
+        start, end = cursor, cursor + dur
+        v_id = f"clip-v-{idx}"
+        a_id = f"clip-a-{idx}"
         v_ci = build_clipitem(v_tpl, v_id, start, end, s.start_f, s.end_f, [a_id])
         a_ci = build_clipitem(a_tpl, a_id, start, end, s.start_f, s.end_f, [v_id])
         video_track.append(v_ci)
         audio_track.append(a_ci)
         cursor = end
     return tree
+# ============ GRADIO INTERFACE ============
+def process_xml_and_transcript(xml_file, txt_file, use_llm, num_segments,
+                               custom_keywords, manual_timecodes, natural_instructions,
                                weight_emotion, weight_break, weight_learn, weight_viral):
+    if not xml_file:
+        return "❌ Envie o XML do Premiere", None, f"LLM: {LLM_AVAILABLE}"
     manual_ranges = parse_manual_timecodes(manual_timecodes)
+    has_instructions = natural_instructions.strip() != ""
+    # Determine mode
     if manual_ranges:
         mode = "MANUAL"
         transcript = ""
+    elif has_instructions:
+        mode = "IA (Linguagem Natural)"
+        if not txt_file:
+            return "❌ Envie a transcrição para usar IA", None, f"LLM: {LLM_AVAILABLE}"
         if not LLM_AVAILABLE:
+            return "❌ IA não disponível. Configure GEMINI_API_KEY", None, f"LLM: False"
+        with open(txt_file.name, "r", encoding="utf-8") as f:
             transcript = f.read()
     else:
         mode = "AUTOMÁTICO"
+        if not txt_file:
+            return "❌ Envie a transcrição", None, f"LLM: {LLM_AVAILABLE}"
+        with open(txt_file.name, "r", encoding="utf-8") as f:
             transcript = f.read()
+    try:
+        segs = select_segments(transcript, use_llm and LLM_AVAILABLE, num_segments,
+                              custom_keywords, manual_timecodes, natural_instructions,
+                              weight_emotion, weight_break, weight_learn, weight_viral)
+        if not segs:
+            return "❌ Nenhum segmento selecionado", None, f"LLM: {LLM_AVAILABLE}"
+        tree = ET.parse(xml_file.name)
+        tree = edit_sequence_with_segments(tree, segs)
+        base = os.path.splitext(os.path.basename(xml_file.name))[0]
+        out_path = os.path.join(OUTPUT_DIR, f"{base}_EDITADO.xml")
+        tree.write(out_path, encoding="utf-8", xml_declaration=True)
+        total_duration = get_total_duration(segs)
+        resumo = f"✂️ {len(segs)} cortes | Duração: {total_duration/60:.1f} min | Modo: {mode}\n\n"
+        for i, s in enumerate(segs, 1):
+            dur = (s.end_f - s.start_f) / FPS
+            resumo += f"{i}. {s.start_tc} → {s.end_tc} ({dur:.1f}s)\n"
+            if s.text and not manual_ranges:
+                resumo += f"   {s.text[:120]}...\n"
+            resumo += "\n"
+        status = f"✓ {mode} | Duração total: {total_duration/60:.1f} min | LLM: {LLM_AVAILABLE}"
+        return resumo, out_path, status
+    except Exception as e:
+        return f"❌ Erro: {str(e)}", None, f"LLM: {LLM_AVAILABLE}"
+# ============ CSS ============
 css = """
 :root {
+    --primary: #39FF14;
+    --text: #1a1a1a;
+    --muted: #6b7280;
 }
 .gradio-container {
+    font-family: system-ui, sans-serif !important;
 }
+.gradio-container h1, .gradio-container label {
+    color: var(--text) !important;
 }
 .gradio-container button.primary {
+    background: var(--primary) !important;
     color: #000 !important;
+    font-weight: 700 !important;
 }
 """
+# ============ GRADIO APP ============
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+    gr.Markdown("# Agente de Edição XML - Premiere Pro")
+    gr.Markdown("Edite sequências do Premiere com IA ou controle manual")
     with gr.Row():
+        xml_in = gr.File(label="XML do Premiere", file_types=[".xml"])
+        txt_in = gr.File(label="Transcrição (.txt)", file_types=[".txt"])
+    with gr.Row():
+        use_llm = gr.Checkbox(label="Usar IA", value=USE_LLM_DEFAULT and LLM_AVAILABLE)
+        num_segments = gr.Slider(2, 20, 5, step=1, label="Segmentos (modo automático)")
+    with gr.Accordion("IA - Linguagem Natural (RECOMENDADO)", open=True):
+        gr.Markdown("""
+**Use linguagem natural para dar instruções:**
+- "Crie um corte de 10 minutos com os melhores momentos"
+- "Extraia 15 minutos das partes mais engraçadas"
+- "Faça um resumo de 5 minutos sobre superação"
+        """)
         natural_instructions = gr.Textbox(
+            label="Instruções para a IA",
+            placeholder='Ex: "Crie um corte de 10 minutos com os melhores momentos distribuídos pelo vídeo"',
+            lines=3
         )
+    with gr.Accordion("Minutagens Manuais", open=False):
         manual_timecodes = gr.Textbox(
+            label="Timecodes exatos (um por linha)",
             placeholder="00:01:23:15 - 00:02:45:10\n00:05:30:00 - 00:07:15:22",
+            lines=4
         )
+    with gr.Accordion("Modo Automático (Palavras-chave)", open=False):
+        custom_keywords = gr.Textbox(label="Palavras-chave personalizadas (separadas por vírgula)")
         with gr.Row():
+            weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: Emoção")
+            weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: Quebra")
         with gr.Row():
+            weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: Aprendizado")
+            weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: Viral")
+    run_btn = gr.Button("Processar XML", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=2):
+            resumo_out = gr.Textbox(label="Resumo", lines=15)
         with gr.Column(scale=1):
             status_out = gr.Textbox(label="Status")
+            file_out = gr.File(label="Download")
     run_btn.click(
+        process_xml_and_transcript,
+        inputs=[xml_in, txt_in, use_llm, num_segments, custom_keywords,
+                manual_timecodes, natural_instructions,
+                weight_emotion, weight_break, weight_learn, weight_viral],
         outputs=[resumo_out, file_out, status_out]
     )