Spaces:

leicam
/

EditorAutomaticoXML

Running

App Files Files Community

leicam commited on Oct 8, 2025

Commit

b10dfca

verified ·

1 Parent(s): c7bfbda

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -261

app.py CHANGED Viewed

@@ -3,13 +3,12 @@ import re
 import json
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass
-from typing import List, Tuple, Optional
 import gradio as gr
 # =========================
 # Configurações Gerais
 # =========================
-FPS = 24
 OUTPUT_DIR = "./Output"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
@@ -50,7 +49,7 @@ class Segment:
 # =========================
 # Funções de Timecode
 # =========================
-def _tc_to_hmsf(tc: str, fps: int = FPS) -> Tuple[int, int, int, int]:
     """Converte timecode para (hh, mm, ss, ff)."""
     s = tc.strip()
@@ -76,12 +75,12 @@ def _tc_to_hmsf(tc: str, fps: int = FPS) -> Tuple[int, int, int, int]:
     raise ValueError(f"Timecode inválido: {tc}")
-def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
     hh, mm, ss, ff = _tc_to_hmsf(tc, fps)
     return hh * 3600 * fps + mm * 60 * fps + ss * fps + ff
-def frames_to_timecode(frames: int, fps: int = FPS) -> str:
     hh = frames // (3600 * fps)
     rem = frames % (3600 * fps)
     mm = rem // (60 * fps)
@@ -94,8 +93,8 @@ def frames_to_timecode(frames: int, fps: int = FPS) -> str:
 # =========================
 # Parser de Transcrição
 # =========================
-def parse_transcript(txt: str) -> List[Segment]:
-    """Parser robusto para múltiplos formatos."""
     if not txt or not txt.strip():
         return []
@@ -137,12 +136,12 @@ def parse_transcript(txt: str) -> List[Segment]:
             text = " ".join(text_parts).strip()
             try:
-                sf = parse_timecode_to_frames(start_tc)
-                ef = parse_timecode_to_frames(end_tc)
                 if ef > sf:
                     results.append(Segment(
-                        start_tc=frames_to_timecode(sf),
-                        end_tc=frames_to_timecode(ef),
                         start_f=sf,
                         end_f=ef,
                         text=text if text else f"{start_tc} - {end_tc}",
@@ -153,6 +152,7 @@ def parse_transcript(txt: str) -> List[Segment]:
             i += 1
             continue
         if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])):
             line_with_tc = raw if arrow.search(raw) else lines[i + 1]
             mm = arrow.search(line_with_tc)
@@ -173,12 +173,12 @@ def parse_transcript(txt: str) -> List[Segment]:
                 text = " ".join(text_parts).strip()
                 try:
-                    sf = parse_timecode_to_frames(start_tc)
-                    ef = parse_timecode_to_frames(end_tc)
                     if ef > sf:
                         results.append(Segment(
-                            start_tc=frames_to_timecode(sf),
-                            end_tc=frames_to_timecode(ef),
                             start_f=sf,
                             end_f=ef,
                             text=text,
@@ -204,7 +204,10 @@ def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
     manual_ranges = []
     lines = manual_input.replace(",", "\n").splitlines()
-    pattern = re.compile(r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-–—]\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)')
     for line in lines:
         m = pattern.search(line.strip())
         if m:
@@ -215,35 +218,38 @@ def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
 # =========================
 # IA: Análise Inteligente com Gemini
 # =========================
-def ai_analyze_and_select(segments: List[Segment], command: str, progress_callback=None) -> List[Segment]:
     """
     Usa Gemini para analisar a transcrição completa e identificar os melhores trechos.
     Processo em 2 etapas para máxima precisão.
     """
     if not LLM_AVAILABLE or not segments:
         raise ValueError("IA não disponível ou sem segmentos para analisar")
     if progress_callback:
-        progress_callback("🤖 Etapa 1/3: Preparando dados para análise...")
     # Prepara a transcrição completa com índices
     transcript_data = []
     for i, seg in enumerate(segments):
-        duration_sec = (seg.end_f - seg.start_f) / FPS
         transcript_data.append({
             "index": i,
             "timecode": seg.start_tc,
             "duration_sec": round(duration_sec, 1),
-            "text": seg.text[:200]  # Limita texto para não estourar tokens
         })
-    # Converte para JSON para análise estruturada
     transcript_json = json.dumps(transcript_data, ensure_ascii=False, indent=2)
     if progress_callback:
-        progress_callback(f"🤖 Etapa 2/3: Analisando {len(segments)} segmentos com IA (pode levar 30-60s)...")
-    # Prompt detalhado para análise completa
     prompt = f"""Você é um especialista em edição de vídeo. Analise a transcrição e identifique os MELHORES trechos baseado no comando do usuário.
 COMANDO DO USUÁRIO:
@@ -267,19 +273,17 @@ INSTRUÇÕES:
     {{
       "start_index": <índice do segmento inicial>,
       "duration_seconds": <duração desejada em segundos>,
-      "reason": "<breve explicação de por que escolheu este trecho>"
     }}
   ]
 }}
 IMPORTANTE:
-- Seja PRECISO na identificação dos trechos
-- Considere o contexto completo ao redor das palavras-chave
-- Se o comando pedir "sobre X", encontre onde X é realmente discutido
 - Se houver timecode, priorize começar próximo a ele
-- Retorne APENAS o JSON, sem texto adicional
-Responda com o JSON:"""
     try:
         response = LLM.generate_content(
@@ -289,63 +293,59 @@ Responda com o JSON:"""
                 "max_output_tokens": 2000,
             }
         )
-        response_text = response.text.strip()
         if progress_callback:
-            progress_callback("🤖 Etapa 3/3: Processando resposta da IA...")
-        # Extrai JSON da resposta
         json_match = re.search(r'\{[\s\S]*"cuts"[\s\S]*\}', response_text)
         if not json_match:
             raise ValueError("IA não retornou JSON válido")
         result = json.loads(json_match.group(0))
         cuts_data = result.get("cuts", [])
         if not cuts_data:
             raise ValueError("IA não encontrou cortes adequados")
-        # Cria os segmentos baseado na análise da IA
-        selected_segments = []
         for cut_info in cuts_data:
-            start_idx = cut_info.get("start_index", 0)
-            duration_sec = cut_info.get("duration_seconds", 60)
-            reason = cut_info.get("reason", "")
             if start_idx < 0 or start_idx >= len(segments):
                 continue
             start_seg = segments[start_idx]
             start_frame = start_seg.start_f
-            duration_frames = int(duration_sec * FPS)
             end_frame = start_frame + duration_frames
             # Coleta texto dos segmentos envolvidos
-            text_parts = [f"[IA: {reason}]"] if reason else []
             for seg in segments[start_idx:]:
                 if seg.start_f < end_frame:
                     if seg.text:
                         text_parts.append(seg.text[:150])
                 else:
                     break
             combined_text = " [...] ".join(text_parts)[:500]
             selected_segments.append(Segment(
-                start_tc=frames_to_timecode(start_frame),
-                end_tc=frames_to_timecode(end_frame),
                 start_f=start_frame,
                 end_f=end_frame,
                 text=combined_text,
                 score=100.0
             ))
         return selected_segments
     except json.JSONDecodeError as e:
-        raise ValueError(f"Erro ao processar resposta da IA (JSON inválido): {str(e)}\nResposta: {response_text[:300]}")
     except Exception as e:
         raise ValueError(f"Erro na análise da IA: {str(e)}")
@@ -353,19 +353,17 @@ Responda com o JSON:"""
 # =========================
 # Processamento com Comando Manual (sem IA)
 # =========================
-def manual_command_processing(segments: List[Segment], command: str) -> List[Segment]:
-    """
-    Fallback: processamento básico sem IA para comandos simples.
-    """
-    s = command.lower()
-    # Extrai quantidade
     count = 1
     m = re.search(r'(\d+)\s*(?:cortes?|clipes?|segmentos?)', s)
     if m:
         count = int(m.group(1))
-    # Extrai duração
     duration_sec = 60
     m = re.search(r'(\d+)\s*(?:segundos?|s\b)', s)
     if m:
@@ -374,44 +372,43 @@ def manual_command_processing(segments: List[Segment], command: str) -> List[Seg
         m = re.search(r'(\d+)\s*(?:minutos?|min\b)', s)
         if m:
             duration_sec = int(m.group(1)) * 60
-    # Extrai timecode inicial
     start_frame = 0
     m = re.search(r'(?:começando|a partir de)\s+(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)', s)
     if m:
         try:
-            start_frame = parse_timecode_to_frames(m.group(1))
-        except:
             pass
-    # Cria cortes contínuos
     results = []
     base_frame = start_frame
     for i in range(count):
-        duration_frames = duration_sec * FPS
         end_frame = base_frame + duration_frames
-        # Coleta texto
         text_parts = []
         for seg in segments:
             if seg.start_f >= base_frame and seg.start_f < end_frame:
                 if seg.text:
                     text_parts.append(seg.text[:100])
         combined_text = " [...] ".join(text_parts[:10])[:400]
         results.append(Segment(
-            start_tc=frames_to_timecode(base_frame),
-            end_tc=frames_to_timecode(end_frame),
             start_f=base_frame,
             end_f=end_frame,
             text=combined_text if combined_text else f"Corte {i+1}",
             score=50.0
         ))
         base_frame = end_frame
     return results
@@ -427,42 +424,42 @@ def auto_score_segments(
     weight_learn: float,
     weight_viral: float
 ) -> List[Segment]:
-    """Sistema de pontuação automática."""
     emotion_words = ['medo', 'coragem', 'amor', 'ódio', 'paixão', 'alegria', 'tristeza']
     break_words = ['nunca', 'de repente', 'surpreendente', 'inesperado', 'incrível']
     learn_words = ['aprendi', 'descobri', 'entendi', 'percebi', 'lição']
     viral_words = ['segredo', 'verdade', 'revelação', 'exclusivo', 'confissão']
     for s in segs:
         score = 0.0
         text = (s.text or "").lower()
         for word in emotion_words:
             if word in text:
                 score += weight_emotion
         for word in break_words:
             if word in text:
                 score += weight_break
         for word in learn_words:
             if word in text:
                 score += weight_learn
         for word in viral_words:
             if word in text:
                 score += weight_viral
         if custom_keywords:
             for kw in custom_keywords.split(","):
                 kw_clean = kw.strip().lower()
                 if kw_clean and kw_clean in text:
                     score += 5.0
         s.score = score
     segs.sort(key=lambda x: x.score, reverse=True)
-    return segs[:num_segments]
 # =========================
@@ -553,7 +550,8 @@ def select_segments(
     weight_break: float,
     weight_learn: float,
     weight_viral: float,
-    progress_callback=None
 ) -> List[Segment]:
     # 1) Manual
@@ -563,10 +561,10 @@ def select_segments(
         for start_tc, end_tc in manual:
             try:
                 result.append(Segment(
-                    start_tc=frames_to_timecode(parse_timecode_to_frames(start_tc)),
-                    end_tc=frames_to_timecode(parse_timecode_to_frames(end_tc)),
-                    start_f=parse_timecode_to_frames(start_tc),
-                    end_f=parse_timecode_to_frames(end_tc),
                     text=f"Manual: {start_tc} - {end_tc}",
                     score=100.0
                 ))
@@ -575,22 +573,23 @@ def select_segments(
         return result
     # 2) Parser de transcrição
-    segs = parse_transcript(transcript_txt) if transcript_txt else []
-    # 3) Linguagem natural COM IA
     if natural_instructions.strip():
         if use_llm and LLM_AVAILABLE and segs:
-            # USA IA PARA ANÁLISE COMPLETA
-            return ai_analyze_and_select(segs, natural_instructions, progress_callback)
         elif segs:
-            # Fallback sem IA
-            return manual_command_processing(segs, natural_instructions)
         else:
-            raise ValueError("Para usar comandos em linguagem natural, forneça uma transcrição ou ative as minutagens manuais.")
     # 4) Automático
     if not segs:
-        raise ValueError("Nenhum segmento encontrado. Forneça uma transcrição, minutagens ou um comando em linguagem natural.")
     return auto_score_segments(
         segs, num_segments, custom_keywords,
         weight_emotion, weight_break, weight_learn, weight_viral
@@ -604,255 +603,188 @@ def process_files(
     xml_file, txt_file, use_llm, num_segments,
     custom_keywords, manual_timecodes, natural_instructions,
     weight_emotion, weight_break, weight_learn, weight_viral,
     progress=gr.Progress()
 ):
     if not xml_file:
-        return "⚠️ Envie o XML do Premiere", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
     try:
         debug_info = []
         def progress_callback(msg):
             progress(0.5, desc=msg)
             debug_info.append(msg)
-        progress(0.1, desc="📂 Carregando arquivos...")
         transcript = ""
         manual = parse_manual_timecodes(manual_timecodes)
         if not manual and txt_file:
             with open(txt_file.name, "r", encoding="utf-8-sig") as f:
                 transcript = f.read()
-            debug_info.append(f"📄 Transcrição: {len(transcript)} caracteres")
-        progress(0.2, desc="🔍 Selecionando segmentos...")
         segments = select_segments(
-            transcript, use_llm and LLM_AVAILABLE, num_segments,
             custom_keywords, manual_timecodes, natural_instructions,
-            weight_emotion, weight_break, weight_learn, weight_viral,
             progress_callback
         )
         if not segments:
-            return "⚠️ Nenhum segmento selecionado", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
         valid_segments = []
         for seg in segments:
-            if seg.end_f > seg.start_f and seg.end_f - seg.start_f >= FPS:
                 valid_segments.append(seg)
         if not valid_segments:
-            return "⚠️ Segmentos inválidos (duração muito curta)", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
         segments = valid_segments
-        debug_info.append(f"✓ {len(segments)} segmento(s) válido(s)")
-        progress(0.7, desc="✂️ Editando XML...")
         tree = ET.parse(xml_file.name)
         tree = edit_xml(tree, segments)
         basename = os.path.splitext(os.path.basename(xml_file.name))[0]
-        output = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
-        tree.write(output, encoding="utf-8", xml_declaration=True)
-        progress(0.9, desc="📊 Gerando resumo...")
-        total_sec = sum((s.end_f - s.start_f) / FPS for s in segments)
         total_min = total_sec / 60.0
         if manual:
-            mode = "🎯 MANUAL"
         elif natural_instructions.strip() and use_llm and LLM_AVAILABLE:
-            mode = "🤖 IA COMPLETA (Gemini)"
         elif natural_instructions.strip():
-            mode = "📐 BÁSICO (sem IA)"
         else:
-            mode = "⚙️ AUTOMÁTICO"
         summary_lines = [
-            "═" * 70,
-            f"✨ RESULTADO: {len(segments)} corte(s) | {total_min:.1f} min total",
-            f"📊 Modo: {mode}",
-            "═" * 70,
             ""
         ]
         for i, seg in enumerate(segments, 1):
-            dur_sec = (seg.end_f - seg.start_f) / FPS
             dur_min = dur_sec / 60.0
-            line = f"🎬 Corte {i}:"
-            line += f"\n   ⏱️  {seg.start_tc} → {seg.end_tc} ({dur_min:.2f} min / {dur_sec:.0f}s)"
             if seg.text and len(seg.text.strip()) > 10:
                 text_preview = seg.text[:200].strip()
                 if len(seg.text) > 200:
                     text_preview += "..."
-                line += f"\n   💬 {text_preview}"
             summary_lines.append(line)
             summary_lines.append("")
         if debug_info:
-            summary_lines.append("═" * 70)
-            summary_lines.append("🔍 Log do Processamento:")
-            summary_lines.extend(f"   {info}" for info in debug_info)
         summary = "\n".join(summary_lines)
-        status = f"✅ Sucesso | {mode} | {total_min:.1f} min | LLM: {'✓' if LLM_AVAILABLE else '✗'}"
-        progress(1.0, desc="✅ Concluído!")
-        return summary, output, status
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(error_trace)
-        error_msg = f"❌ Erro: {str(e)}\n\n🔍 Detalhes:\n{error_trace[:800]}"
-        return error_msg, None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
 # =========================
 # Interface Gradio
 # =========================
 with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere - IA") as demo:
-    gr.Markdown("# 🎬 Editor XML Premiere - IA Completa (Gemini)")
-    gr.Markdown("Sistema que **REALMENTE ENTENDE** seu comando usando análise completa com IA.")
-    status_inicial = f"{'🟢 IA Gemini Ativa - Análise Completa Habilitada' if LLM_AVAILABLE else '🔴 IA Desabilitada - Configure GEMINI_API_KEY para análise inteligente'}"
-    gr.Markdown(f"**Status:** {status_inicial}")
-    if LLM_AVAILABLE:
-        gr.Markdown("""
-        ### 🚀 Como funciona a IA:
-        1. **Você descreve** o que quer em linguagem natural
-        2. **IA analisa** toda a transcrição (pode levar 30-60s)
-        3. **IA identifica** os trechos exatos que correspondem ao seu pedido
-        4. **Sistema cria** os cortes precisos automaticamente
-        ⚡ **Mais lento, mas MUITO mais preciso!**
-        """)
-    else:
-        gr.Markdown("""
-        ### ⚠️ IA Desabilitada
-        Configure a variável de ambiente `GEMINI_API_KEY` para ativar análise inteligente.
-        No modo básico, apenas comandos simples e timecodes manuais funcionam bem.
-        """)
     with gr.Row():
-        xml_in = gr.File(label="📄 XML do Premiere", file_types=[".xml"])
-        txt_in = gr.File(label="📝 Transcrição (.txt) - OBRIGATÓRIA para IA", file_types=[".txt"])
     with gr.Row():
         use_llm = gr.Checkbox(
-            label="🤖 Usar IA Gemini (análise completa - RECOMENDADO)",
             value=USE_LLM_DEFAULT and LLM_AVAILABLE,
-            interactive=LLM_AVAILABLE,
-            info="Quando ativo, a IA lê TODA a transcrição e encontra os melhores trechos"
         )
-        num_segments = gr.Slider(2, 20, 5, 1, label="📊 Segmentos (apenas modo automático)")
-    with gr.Accordion("💬 Comando em Linguagem Natural (MODO PRINCIPAL)", open=True):
-        gr.Markdown("""
-        ### ✨ Exemplos de comandos que a IA entende:
-        **📌 Simples:**
-        - "Crie 3 cortes de 30 segundos sobre futebol"
-        - "Quero 2 clipes de 1 minuto falando sobre Maria"
-        - "Faça 5 cortes de 45s sobre o tema educação"
-        **🎯 Específicos:**
-        - "1 corte de 10 minutos da parte onde ele fala sobre a infância"
-        - "3 cortes de 30s sobre os momentos engraçados"
-        - "2 clipes de 1min sobre superação e disciplina"
-        **📍 Com timecode:**
-        - "Corte de 5 minutos começando em 00:02:00:00 sobre tecnologia"
-        - "3 cortes de 45s a partir de 00:10:00 falando sobre amor"
-        **🔍 Busca temática:**
-        - "Os melhores momentos sobre família, cada um com 40s"
-        - "Trechos emocionantes de 1 minuto cada"
-        - "Partes onde menciona desafios e conquistas"
-        ### 💡 Dicas para melhores resultados:
-        - ✅ Seja específico sobre o tema/assunto
-        - ✅ Especifique duração e quantidade
-        - ✅ Use a transcrição completa
-        - ✅ Deixe a IA trabalhar (30-60s de análise)
-        - ❌ Evite comandos vagos como "faça algo legal"
-        """)
         natural_instructions = gr.Textbox(
-            label="Digite seu comando aqui",
-            placeholder='Ex: "Crie 3 cortes de 45 segundos sobre os momentos onde ele fala de disciplina e superação"',
             lines=4
         )
-    with gr.Accordion("🎯 Minutagens Manuais (precisão total)", open=False):
-        gr.Markdown("Use quando souber exatamente os timecodes. Ignora IA e outros modos.")
         manual_timecodes = gr.Textbox(
             label="Timecodes (um por linha)",
             placeholder="00:21:18:09 - 00:31:18:09\n00:45:20:15 - 00:50:10:22",
             lines=4
         )
-    with gr.Accordion("⚙️ Modo Automático (sem comando)", open=False):
-        gr.Markdown("Sistema de pontuação simples. **Não recomendado** - use comandos em linguagem natural.")
         custom_keywords = gr.Textbox(
             label="Palavras-chave (separadas por vírgula)",
             placeholder="coragem, superação, vitória"
         )
         with gr.Row():
-            weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="⚡ Peso: emoção")
-            weight_break = gr.Slider(0, 5, 1.5, 0.1, label="💥 Peso: quebra")
         with gr.Row():
-            weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="🎓 Peso: aprendizado")
-            weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="🔥 Peso: viral")
-    btn = gr.Button("🚀 Processar com IA (pode levar 30-60s)", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=2):
-            summary_out = gr.Textbox(label="📋 Resumo dos Cortes", lines=20, max_lines=30)
         with gr.Column(scale=1):
-            status_out = gr.Textbox(label="📊 Status", lines=3)
-            file_out = gr.File(label="⬇️ Download XML Editado")
     btn.click(
         process_files,
         [xml_in, txt_in, use_llm, num_segments, custom_keywords,
          manual_timecodes, natural_instructions,
-         weight_emotion, weight_break, weight_learn, weight_viral],
         [summary_out, file_out, status_out]
     )
-    gr.Markdown("""
----
-### 📚 Guia Rápido:
-**🎯 Para melhores resultados:**
-1. ✅ Envie XML + Transcrição completa
-2. ✅ Ative a IA (checkbox)
-3. ✅ Escreva comando claro e específico
-4. ✅ Aguarde 30-60s para análise completa
-5. ✅ Baixe e importe no Premiere
-**⚡ Ordem de prioridade:**
-1. **Minutagens Manuais** (ignora tudo, máxima precisão)
-2. **Comando + IA** (análise completa, muito preciso)
-3. **Comando sem IA** (básico, menos preciso)
-4. **Modo Automático** (não recomendado)
-**🔧 Troubleshooting:**
-- Erro "IA não disponível": Configure `GEMINI_API_KEY`
-- Cortes errados: Seja mais específico no comando
-- Demora muito: Normal para IA completa (30-60s)
-- Sem transcrição: Use minutagens manuais
-    """)
 if __name__ == "__main__":
-    demo.launch()

 import json
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass
+from typing import List, Tuple, Optional, Callable
 import gradio as gr
 # =========================
 # Configurações Gerais
 # =========================
 OUTPUT_DIR = "./Output"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # =========================
 # Funções de Timecode
 # =========================
+def _tc_to_hmsf(tc: str, fps: int) -> Tuple[int, int, int, int]:
     """Converte timecode para (hh, mm, ss, ff)."""
     s = tc.strip()
     raise ValueError(f"Timecode inválido: {tc}")
+def parse_timecode_to_frames(tc: str, fps: int) -> int:
     hh, mm, ss, ff = _tc_to_hmsf(tc, fps)
     return hh * 3600 * fps + mm * 60 * fps + ss * fps + ff
+def frames_to_timecode(frames: int, fps: int) -> str:
     hh = frames // (3600 * fps)
     rem = frames % (3600 * fps)
     mm = rem // (60 * fps)
 # =========================
 # Parser de Transcrição
 # =========================
+def parse_transcript(txt: str, fps: int) -> List[Segment]:
+    """Parser robusto para múltiplos formatos (intervalos e WEBVTT/SRT)."""
     if not txt or not txt.strip():
         return []
             text = " ".join(text_parts).strip()
             try:
+                sf = parse_timecode_to_frames(start_tc, fps)
+                ef = parse_timecode_to_frames(end_tc, fps)
                 if ef > sf:
                     results.append(Segment(
+                        start_tc=frames_to_timecode(sf, fps),
+                        end_tc=frames_to_timecode(ef, fps),
                         start_f=sf,
                         end_f=ef,
                         text=text if text else f"{start_tc} - {end_tc}",
             i += 1
             continue
+        # Bloco estilo VTT/SRT: "00:00:01,000 --> 00:00:03,000"
         if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])):
             line_with_tc = raw if arrow.search(raw) else lines[i + 1]
             mm = arrow.search(line_with_tc)
                 text = " ".join(text_parts).strip()
                 try:
+                    sf = parse_timecode_to_frames(start_tc, fps)
+                    ef = parse_timecode_to_frames(end_tc, fps)
                     if ef > sf:
                         results.append(Segment(
+                            start_tc=frames_to_timecode(sf, fps),
+                            end_tc=frames_to_timecode(ef, fps),
                             start_f=sf,
                             end_f=ef,
                             text=text,
     manual_ranges = []
     lines = manual_input.replace(",", "\n").splitlines()
+    pattern = re.compile(
+        r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-–—]\s*'
+        r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)'
+    )
     for line in lines:
         m = pattern.search(line.strip())
         if m:
 # =========================
 # IA: Análise Inteligente com Gemini
 # =========================
+def ai_analyze_and_select(
+    segments: List[Segment],
+    command: str,
+    fps: int,
+    progress_callback: Optional[Callable[[str], None]] = None
+) -> List[Segment]:
     """
     Usa Gemini para analisar a transcrição completa e identificar os melhores trechos.
     Processo em 2 etapas para máxima precisão.
     """
     if not LLM_AVAILABLE or not segments:
         raise ValueError("IA não disponível ou sem segmentos para analisar")
     if progress_callback:
+        progress_callback("Etapa 1/3: preparando dados para análise...")
     # Prepara a transcrição completa com índices
     transcript_data = []
     for i, seg in enumerate(segments):
+        duration_sec = max(0, (seg.end_f - seg.start_f) / fps)
         transcript_data.append({
             "index": i,
             "timecode": seg.start_tc,
             "duration_sec": round(duration_sec, 1),
+            "text": (seg.text or "")[:200]  # Limita texto para não estourar tokens
         })
     transcript_json = json.dumps(transcript_data, ensure_ascii=False, indent=2)
     if progress_callback:
+        progress_callback(f"Etapa 2/3: analisando {len(segments)} segmentos com IA...")
     prompt = f"""Você é um especialista em edição de vídeo. Analise a transcrição e identifique os MELHORES trechos baseado no comando do usuário.
 COMANDO DO USUÁRIO:
     {{
       "start_index": <índice do segmento inicial>,
       "duration_seconds": <duração desejada em segundos>,
+      "reason": "<breve explicação>"
     }}
   ]
 }}
 IMPORTANTE:
+- Seja preciso na identificação dos trechos
+- Considere o contexto completo
 - Se houver timecode, priorize começar próximo a ele
+- Responda apenas com o JSON
+"""
     try:
         response = LLM.generate_content(
                 "max_output_tokens": 2000,
             }
         )
+        response_text = (response.text or "").strip()
         if progress_callback:
+            progress_callback("Etapa 3/3: processando resposta da IA...")
         json_match = re.search(r'\{[\s\S]*"cuts"[\s\S]*\}', response_text)
         if not json_match:
             raise ValueError("IA não retornou JSON válido")
         result = json.loads(json_match.group(0))
         cuts_data = result.get("cuts", [])
         if not cuts_data:
             raise ValueError("IA não encontrou cortes adequados")
+        selected_segments: List[Segment] = []
         for cut_info in cuts_data:
+            start_idx = int(cut_info.get("start_index", 0))
+            duration_sec = int(cut_info.get("duration_seconds", 60))
+            reason = str(cut_info.get("reason", "")).strip()
             if start_idx < 0 or start_idx >= len(segments):
                 continue
             start_seg = segments[start_idx]
             start_frame = start_seg.start_f
+            duration_frames = max(0, int(duration_sec * fps))
             end_frame = start_frame + duration_frames
             # Coleta texto dos segmentos envolvidos
+            text_parts = [f"[IA] {reason}"] if reason else []
             for seg in segments[start_idx:]:
                 if seg.start_f < end_frame:
                     if seg.text:
                         text_parts.append(seg.text[:150])
                 else:
                     break
             combined_text = " [...] ".join(text_parts)[:500]
             selected_segments.append(Segment(
+                start_tc=frames_to_timecode(start_frame, fps),
+                end_tc=frames_to_timecode(end_frame, fps),
                 start_f=start_frame,
                 end_f=end_frame,
                 text=combined_text,
                 score=100.0
             ))
         return selected_segments
     except json.JSONDecodeError as e:
+        raise ValueError(f"Erro ao processar resposta da IA (JSON inválido): {str(e)}")
     except Exception as e:
         raise ValueError(f"Erro na análise da IA: {str(e)}")
 # =========================
 # Processamento com Comando Manual (sem IA)
 # =========================
+def manual_command_processing(segments: List[Segment], command: str, fps: int) -> List[Segment]:
+    """Fallback: processamento básico sem IA para comandos simples."""
+    s = (command or "").lower()
+    # quantidade
     count = 1
     m = re.search(r'(\d+)\s*(?:cortes?|clipes?|segmentos?)', s)
     if m:
         count = int(m.group(1))
+    # duração
     duration_sec = 60
     m = re.search(r'(\d+)\s*(?:segundos?|s\b)', s)
     if m:
         m = re.search(r'(\d+)\s*(?:minutos?|min\b)', s)
         if m:
             duration_sec = int(m.group(1)) * 60
+    # timecode inicial
     start_frame = 0
     m = re.search(r'(?:começando|a partir de)\s+(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)', s)
     if m:
         try:
+            start_frame = parse_timecode_to_frames(m.group(1), fps)
+        except Exception:
             pass
+    # cortes contínuos
     results = []
     base_frame = start_frame
     for i in range(count):
+        duration_frames = duration_sec * fps
         end_frame = base_frame + duration_frames
         text_parts = []
         for seg in segments:
             if seg.start_f >= base_frame and seg.start_f < end_frame:
                 if seg.text:
                     text_parts.append(seg.text[:100])
         combined_text = " [...] ".join(text_parts[:10])[:400]
         results.append(Segment(
+            start_tc=frames_to_timecode(base_frame, fps),
+            end_tc=frames_to_timecode(end_frame, fps),
             start_f=base_frame,
             end_f=end_frame,
             text=combined_text if combined_text else f"Corte {i+1}",
             score=50.0
         ))
         base_frame = end_frame
     return results
     weight_learn: float,
     weight_viral: float
 ) -> List[Segment]:
+    """Sistema de pontuação automática simples por palavras-chave."""
     emotion_words = ['medo', 'coragem', 'amor', 'ódio', 'paixão', 'alegria', 'tristeza']
     break_words = ['nunca', 'de repente', 'surpreendente', 'inesperado', 'incrível']
     learn_words = ['aprendi', 'descobri', 'entendi', 'percebi', 'lição']
     viral_words = ['segredo', 'verdade', 'revelação', 'exclusivo', 'confissão']
     for s in segs:
         score = 0.0
         text = (s.text or "").lower()
         for word in emotion_words:
             if word in text:
                 score += weight_emotion
         for word in break_words:
             if word in text:
                 score += weight_break
         for word in learn_words:
             if word in text:
                 score += weight_learn
         for word in viral_words:
             if word in text:
                 score += weight_viral
         if custom_keywords:
             for kw in custom_keywords.split(","):
                 kw_clean = kw.strip().lower()
                 if kw_clean and kw_clean in text:
                     score += 5.0
         s.score = score
     segs.sort(key=lambda x: x.score, reverse=True)
+    return segs[:max(1, num_segments)]
 # =========================
     weight_break: float,
     weight_learn: float,
     weight_viral: float,
+    fps: int,
+    progress_callback: Optional[Callable[[str], None]] = None
 ) -> List[Segment]:
     # 1) Manual
         for start_tc, end_tc in manual:
             try:
                 result.append(Segment(
+                    start_tc=frames_to_timecode(parse_timecode_to_frames(start_tc, fps), fps),
+                    end_tc=frames_to_timecode(parse_timecode_to_frames(end_tc, fps), fps),
+                    start_f=parse_timecode_to_frames(start_tc, fps),
+                    end_f=parse_timecode_to_frames(end_tc, fps),
                     text=f"Manual: {start_tc} - {end_tc}",
                     score=100.0
                 ))
         return result
     # 2) Parser de transcrição
+    segs = parse_transcript(transcript_txt, fps) if transcript_txt else []
+    # 3) Linguagem natural
     if natural_instructions.strip():
         if use_llm and LLM_AVAILABLE and segs:
+            return ai_analyze_and_select(segs, natural_instructions, fps, progress_callback)
         elif segs:
+            return manual_command_processing(segs, natural_instructions, fps)
         else:
+            raise ValueError(
+                "Para usar comandos em linguagem natural, forneça uma transcrição "
+                "ou use minutagens manuais."
+            )
     # 4) Automático
     if not segs:
+        raise ValueError("Nenhum segmento encontrado. Envie transcrição, minutagens ou um comando em linguagem natural.")
     return auto_score_segments(
         segs, num_segments, custom_keywords,
         weight_emotion, weight_break, weight_learn, weight_viral
     xml_file, txt_file, use_llm, num_segments,
     custom_keywords, manual_timecodes, natural_instructions,
     weight_emotion, weight_break, weight_learn, weight_viral,
+    fps,
     progress=gr.Progress()
 ):
     if not xml_file:
+        return "Envie o XML do Premiere", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
     try:
         debug_info = []
         def progress_callback(msg):
             progress(0.5, desc=msg)
             debug_info.append(msg)
+        progress(0.1, desc="Carregando arquivos...")
         transcript = ""
         manual = parse_manual_timecodes(manual_timecodes)
         if not manual and txt_file:
             with open(txt_file.name, "r", encoding="utf-8-sig") as f:
                 transcript = f.read()
+            debug_info.append(f"Transcrição: {len(transcript)} caracteres")
+        progress(0.2, desc="Selecionando segmentos...")
         segments = select_segments(
+            transcript, bool(use_llm) and LLM_AVAILABLE, int(num_segments),
             custom_keywords, manual_timecodes, natural_instructions,
+            float(weight_emotion), float(weight_break), float(weight_learn), float(weight_viral),
+            int(fps),
             progress_callback
         )
         if not segments:
+            return "Nenhum segmento selecionado", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
+        # Validar duração mínima: pelo menos 1 segundo
         valid_segments = []
         for seg in segments:
+            if seg.end_f > seg.start_f and (seg.end_f - seg.start_f) >= max(1, int(fps)):
                 valid_segments.append(seg)
         if not valid_segments:
+            return "Segmentos inválidos (duração muito curta)", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
         segments = valid_segments
+        debug_info.append(f"{len(segments)} segmento(s) válidos")
+        progress(0.7, desc="Editando XML...")
         tree = ET.parse(xml_file.name)
         tree = edit_xml(tree, segments)
         basename = os.path.splitext(os.path.basename(xml_file.name))[0]
+        output_path = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
+        tree.write(output_path, encoding="utf-8", xml_declaration=True)
+        progress(0.9, desc="Gerando resumo...")
+        total_sec = sum((s.end_f - s.start_f) / fps for s in segments)
         total_min = total_sec / 60.0
         if manual:
+            mode = "Manual"
         elif natural_instructions.strip() and use_llm and LLM_AVAILABLE:
+            mode = "IA Completa (Gemini)"
         elif natural_instructions.strip():
+            mode = "Básico (sem IA)"
         else:
+            mode = "Automático"
         summary_lines = [
+            "RESULTADO",
+            f"- Cortes: {len(segments)}",
+            f"- Duração total: {total_min:.1f} min",
+            f"- Modo: {mode}",
             ""
         ]
         for i, seg in enumerate(segments, 1):
+            dur_sec = (seg.end_f - seg.start_f) / fps
             dur_min = dur_sec / 60.0
+            line = f"Corte {i}\n  {seg.start_tc} -> {seg.end_tc}  ({dur_min:.2f} min / {dur_sec:.0f}s)"
             if seg.text and len(seg.text.strip()) > 10:
                 text_preview = seg.text[:200].strip()
                 if len(seg.text) > 200:
                     text_preview += "..."
+                line += f"\n  {text_preview}"
             summary_lines.append(line)
             summary_lines.append("")
         if debug_info:
+            summary_lines.append("Log do processamento:")
+            summary_lines.extend(f"- {info}" for info in debug_info)
         summary = "\n".join(summary_lines)
+        status = f"Sucesso | {mode} | {total_min:.1f} min | LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
+        progress(1.0, desc="Concluído")
+        return summary, output_path, status
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(error_trace)
+        error_msg = f"Erro: {str(e)}\n\nDetalhes:\n{error_trace[:800]}"
+        return error_msg, None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
 # =========================
 # Interface Gradio
 # =========================
 with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere - IA") as demo:
+    gr.Markdown("# Editor XML Premiere - IA Completa (Gemini)")
+    status_inicial = f"{'IA Gemini ativa' if LLM_AVAILABLE else 'IA desabilitada: configure GEMINI_API_KEY'}"
+    gr.Markdown(f"Status: {status_inicial}")
     with gr.Row():
+        xml_in = gr.File(label="XML do Premiere", file_types=[".xml"])
+        txt_in = gr.File(label="Transcrição (.txt) — obrigatória para IA", file_types=[".txt"])
     with gr.Row():
         use_llm = gr.Checkbox(
+            label="Usar IA Gemini (análise completa — recomendado)",
             value=USE_LLM_DEFAULT and LLM_AVAILABLE,
+            interactive=LLM_AVAILABLE
+        )
+        num_segments = gr.Slider(2, 20, 5, 1, label="Quantidade de segmentos (modo automático)")
+    fps_in = gr.Slider(12, 60, 24, 1, label="FPS")
+    with gr.Accordion("Comando em linguagem natural (modo principal)", open=True):
+        gr.Markdown(
+            "Exemplos: \n"
+            '- "Crie 3 cortes de 30 segundos sobre disciplina"\n'
+            '- "2 clipes de 1 minuto falando sobre Maria"\n'
+            '- "Corte de 5 minutos começando em 00:02:00:00 sobre tecnologia"'
         )
         natural_instructions = gr.Textbox(
+            label="Digite seu comando",
+            placeholder='Ex: "Crie 3 cortes de 45 segundos sobre os momentos de disciplina e superação"',
             lines=4
         )
+    with gr.Accordion("Minutagens manuais (precisão total)", open=False):
+        gr.Markdown("Ignora IA e outros modos.")
         manual_timecodes = gr.Textbox(
             label="Timecodes (um por linha)",
             placeholder="00:21:18:09 - 00:31:18:09\n00:45:20:15 - 00:50:10:22",
             lines=4
         )
+    with gr.Accordion("Modo automático (sem comando)", open=False):
+        gr.Markdown("Sistema de pontuação simples por palavras-chave.")
         custom_keywords = gr.Textbox(
             label="Palavras-chave (separadas por vírgula)",
             placeholder="coragem, superação, vitória"
         )
         with gr.Row():
+            weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: emoção")
+            weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: quebra")
         with gr.Row():
+            weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: aprendizado")
+            weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: viral")
+    btn = gr.Button("Processar")
     with gr.Row():
         with gr.Column(scale=2):
+            summary_out = gr.Textbox(label="Resumo dos cortes", lines=20, max_lines=30)
         with gr.Column(scale=1):
+            status_out = gr.Textbox(label="Status", lines=3)
+            file_out = gr.File(label="Download XML editado")
     btn.click(
         process_files,
         [xml_in, txt_in, use_llm, num_segments, custom_keywords,
          manual_timecodes, natural_instructions,
+         weight_emotion, weight_break, weight_learn, weight_viral, fps_in],
         [summary_out, file_out, status_out]
     )
 if __name__ == "__main__":
+    demo.launch()