Spaces:

leicam
/

EditorAutomaticoXML

Sleeping

App Files Files Community

leicam commited on Oct 7, 2025

Commit

0e92a90

verified ·

1 Parent(s): 047f823

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -59

app.py CHANGED Viewed

@@ -17,11 +17,14 @@ try:
         genai.configure(api_key=GEMINI_API_KEY)
         LLM = genai.GenerativeModel(LLM_MODEL_NAME)
         LLM_AVAILABLE = True
     else:
         LLM = None
-except Exception:
     LLM = None
     LLM_AVAILABLE = False
 # Config
 FPS = 24
@@ -66,20 +69,13 @@ def parse_transcript_full(txt: str) -> List[Segment]:
     lines = txt.splitlines()
     results: List[Segment] = []
-    # Regex flexível: aceita vários formatos
-    # [00:00:00:00 - 00:00:10:00] Texto
-    # 00:00:00:00 - 00:00:10:00 Texto
-    # 00:00:00:00 — 00:00:10:00 Texto
     pattern = re.compile(
         r'^\s*\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—–]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s*(.*)$'
     )
-    current_text_buffer = []
     for line in lines:
         line = line.strip()
-        # Pula linhas vazias ou apenas "Desconhecido"
         if not line or line == "Desconhecido":
             continue
@@ -89,7 +85,6 @@ def parse_transcript_full(txt: str) -> List[Segment]:
             start_tc, end_tc, text = match.groups()
             text = text.strip()
-            # Se não tem texto na mesma linha, pega da próxima
             if not text or text == "Desconhecido":
                 continue
@@ -107,15 +102,10 @@ def parse_transcript_full(txt: str) -> List[Segment]:
                         score=0.0
                     ))
             except Exception as e:
-                print(f"⚠ Erro ao processar: {line[:60]}... -> {e}")
                 continue
     print(f"✓ Encontrados {len(results)} segmentos na transcrição")
-    if not results:
-        print("⚠ AVISO: Nenhum segmento válido encontrado!")
-        print("Formato esperado: 00:00:00:00 - 00:00:10:00 Texto")
     return results
 # ============ MANUAL TIMECODES ============
@@ -156,12 +146,12 @@ def extract_duration_and_keywords(instructions: str) -> Tuple[Optional[float], L
         match = re.search(pattern, instructions_lower)
         if match:
             duration = float(match.group(1))
             break
     # Extrai palavras-chave importantes
     keywords = []
-    # Busca por tópicos específicos
     topic_keywords = {
         'tenista': ['tenista', 'tênis', 'jogador', 'kinguios'],
         'maria': ['maria', 'josé', 'casal', 'seguro', 'carro'],
@@ -174,6 +164,7 @@ def extract_duration_and_keywords(instructions: str) -> Tuple[Optional[float], L
         if any(term in instructions_lower for term in terms):
             keywords.append(key)
     return duration, keywords
 def find_segment_by_content(segs: List[Segment], keywords: List[str]) -> int:
@@ -192,11 +183,13 @@ def find_segment_by_content(segs: List[Segment], keywords: List[str]) -> int:
             best_score = score
             best_idx = idx
     return best_idx
 def ai_find_start_point(segs: List[Segment], instructions: str, keywords: List[str]) -> int:
     """Usa IA para encontrar ponto de início"""
     if not LLM_AVAILABLE:
         return find_segment_by_content(segs, keywords)
     # Cria resumo dos primeiros 150 segmentos
@@ -204,7 +197,7 @@ def ai_find_start_point(segs: List[Segment], instructions: str, keywords: List[s
     for i, s in enumerate(segs[:150]):
         duration = (s.end_f - s.start_f) / FPS
         segments_preview.append(
-            f"{i}. [{s.start_tc}] ({duration:.1f}s) {s.text[:100]}"
         )
     prompt = f"""Você é um editor de vídeo. Encontre o índice do segmento onde deve COMEÇAR o corte.
@@ -218,18 +211,20 @@ SEGMENTOS DISPONÍVEIS:
 IMPORTANTE:
 - Analise onde está o conteúdo solicitado
 - Retorne APENAS o número do índice (exemplo: 87)
-- Se mencionar "tenista", procure por "tenista", "tênis", "Kinguios"
-- Se mencionar "Maria", procure por "Maria", "José", "carro"
 RESPONDA APENAS COM O NÚMERO:"""
     try:
         response = LLM.generate_content(prompt, generation_config={
             "temperature": 0.1,
             "max_output_tokens": 50
         })
         text = (response.text or "").strip()
         match = re.search(r'\b(\d+)\b', text)
         if match:
@@ -239,11 +234,11 @@ RESPONDA APENAS COM O NÚMERO:"""
                 return idx
     except Exception as e:
-        print(f"⚠ Erro na IA, usando busca por palavras-chave: {e}")
-    # Fallback: busca por palavras-chave
     fallback_idx = find_segment_by_content(segs, keywords)
-    print(f"✓ Usando busca por palavras-chave, início no segmento {fallback_idx}")
     return fallback_idx
 def create_continuous_cut(segs: List[Segment], start_idx: int, duration_minutes: float) -> List[Segment]:
@@ -257,7 +252,14 @@ def create_continuous_cut(segs: List[Segment], start_idx: int, duration_minutes:
     start_frame = start_seg.start_f
     end_frame = start_frame + target_frames
-    # Cria texto combinado dos segmentos envolvidos
     involved_segs = []
     for seg in segs[start_idx:]:
         if seg.start_f < end_frame:
@@ -276,6 +278,7 @@ def create_continuous_cut(segs: List[Segment], start_idx: int, duration_minutes:
         score=100.0
     )
     return [result]
 def ai_select_segments(segs: List[Segment], instructions: str) -> List[Segment]:
@@ -283,26 +286,22 @@ def ai_select_segments(segs: List[Segment], instructions: str) -> List[Segment]:
     if not segs:
         raise ValueError("Nenhum segmento disponível")
     # Extrai duração e palavras-chave
     duration, keywords = extract_duration_and_keywords(instructions)
-    print(f"Instruções analisadas - Duração: {duration}min, Keywords: {keywords}")
     if duration:
         # Modo: corte contínuo de X minutos
         start_idx = ai_find_start_point(segs, instructions, keywords)
         result = create_continuous_cut(segs, start_idx, duration)
-        print(f"✓ Corte criado: {result[0].start_tc} → {result[0].end_tc} ({duration}min)")
         return result
     else:
-        # Modo: seleção múltipla de trechos
         print("⚠ Duração não especificada, usando modo de seleção múltipla")
         start_idx = ai_find_start_point(segs, instructions, keywords)
-        # Retorna 10 segmentos a partir do ponto encontrado
         selected = segs[start_idx:start_idx + 10]
         if not selected:
@@ -352,10 +351,14 @@ def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
                    weight_learn: float, weight_viral: float) -> List[Segment]:
     """Função principal de seleção"""
     # Prioridade 1: Timecodes manuais
     manual_ranges = parse_manual_timecodes(manual_timecodes)
     if manual_ranges:
-        print(f"Modo: MANUAL - {len(manual_ranges)} ranges")
         result_segs = []
         for start_tc, end_tc in manual_ranges:
             try:
@@ -378,15 +381,19 @@ def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
     segs = parse_transcript_full(transcript_txt)
     if not segs:
-        raise ValueError("Nenhum segmento válido encontrado na transcrição. Verifique o formato: 00:00:00:00 - 00:00:10:00 Texto")
-    # Prioridade 2: Instruções em linguagem natural com IA
-    if natural_instructions.strip() and use_llm and LLM_AVAILABLE:
-        print(f"Modo: IA com linguagem natural")
         return ai_select_segments(segs, natural_instructions)
     # Prioridade 3: Modo automático com pontuação
-    print(f"Modo: AUTOMÁTICO por pontuação")
     weights = {
         "emotion": weight_emotion,
         "break": weight_break,
@@ -421,7 +428,6 @@ def edit_sequence_with_segments(tree: ET.ElementTree, segs: List[Segment]) -> ET
     if v_tpl is None or a_tpl is None:
         raise ValueError("Clipitem template não encontrado")
-    # Copia estrutura do template
     def deep_copy(elem):
         new = ET.Element(elem.tag, attrib=elem.attrib)
         new.text = elem.text
@@ -447,7 +453,7 @@ def edit_sequence_with_segments(tree: ET.ElementTree, segs: List[Segment]) -> ET
         v_id = f"clip-v-{idx}"
         a_id = f"clip-a-{idx}"
-        # Cria video clip
         v_ci = ET.Element("clipitem", {"id": v_id})
         v_name = ET.SubElement(v_ci, "name")
         v_name.text = f"Clip {idx}"
@@ -467,7 +473,7 @@ def edit_sequence_with_segments(tree: ET.ElementTree, segs: List[Segment]) -> ET
         v_link = ET.SubElement(v_ci, "link")
         ET.SubElement(v_link, "linkclipref").text = a_id
-        # Cria audio clip
         a_ci = ET.Element("clipitem", {"id": a_id})
         a_name = ET.SubElement(a_ci, "name")
         a_name.text = f"Clip {idx}"
@@ -501,7 +507,7 @@ def process_xml_and_transcript(xml_file, txt_file, use_llm, num_segments,
     """Processa XML e transcrição"""
     if not xml_file:
-        return "❌ Envie o arquivo XML do Premiere", None, f"LLM: {LLM_AVAILABLE}"
     manual_ranges = parse_manual_timecodes(manual_timecodes)
     has_instructions = natural_instructions.strip() != ""
@@ -511,18 +517,16 @@ def process_xml_and_transcript(xml_file, txt_file, use_llm, num_segments,
         mode = "MANUAL"
         transcript = ""
     elif has_instructions:
-        mode = "IA (Linguagem Natural)"
         if not txt_file:
-            return "❌ Para usar IA, envie a transcrição (.txt)", None, f"LLM: {LLM_AVAILABLE}"
-        if not LLM_AVAILABLE:
-            return "❌ IA não disponível. Configure GEMINI_API_KEY nas variáveis de ambiente", None, "LLM: False"
         with open(txt_file.name, "r", encoding="utf-8") as f:
             transcript = f.read()
     else:
         mode = "AUTOMÁTICO"
         if not txt_file:
-            return "❌ Envie a transcrição (.txt)", None, f"LLM: {LLM_AVAILABLE}"
         with open(txt_file.name, "r", encoding="utf-8") as f:
             transcript = f.read()
@@ -536,7 +540,7 @@ def process_xml_and_transcript(xml_file, txt_file, use_llm, num_segments,
         )
         if not segs:
-            return "❌ Nenhum segmento foi selecionado", None, f"LLM: {LLM_AVAILABLE}"
         # Edita XML
         tree = ET.parse(xml_file.name)
@@ -550,7 +554,7 @@ def process_xml_and_transcript(xml_file, txt_file, use_llm, num_segments,
         # Gera resumo
         total_duration = sum((s.end_f - s.start_f) / FPS for s in segs)
-        resumo = f"✂️ {len(segs)} corte(s) | Duração total: {total_duration/60:.1f} min | Modo: {mode}\n\n"
         for i, s in enumerate(segs, 1):
             dur = (s.end_f - s.start_f) / FPS
@@ -559,15 +563,17 @@ def process_xml_and_transcript(xml_file, txt_file, use_llm, num_segments,
                 resumo += f"   {s.text[:150]}\n"
             resumo += "\n"
-        status = f"✓ Sucesso! | Modo: {mode} | Duração: {total_duration/60:.1f} min | LLM: {LLM_AVAILABLE}"
         return resumo, out_path, status
     except Exception as e:
         import traceback
         error_detail = traceback.format_exc()
-        print(f"ERRO COMPLETO:\n{error_detail}")
-        return f"❌ Erro: {str(e)}", None, f"LLM: {LLM_AVAILABLE}"
 # ============ CSS & GRADIO APP ============
 css = """
@@ -596,7 +602,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Editor XML Premiere") as
             use_llm = gr.Checkbox(
                 label="🤖 Usar IA (Gemini)",
                 value=USE_LLM_DEFAULT and LLM_AVAILABLE,
-                info="Requer GEMINI_API_KEY configurada"
             )
             num_segments = gr.Slider(
                 2, 20, 5, step=1,
@@ -610,6 +616,8 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Editor XML Premiere") as
 - `Crie um corte de 15 minutos com os melhores momentos`
 - `Faça um corte de 5 minutos sobre Maria e José`
 - `Corte de 8 minutos a partir de onde fala sobre protocolo`
         """)
         natural_instructions = gr.Textbox(
             label="Suas instruções",
@@ -659,10 +667,4 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Editor XML Premiere") as
     gr.Markdown("""
     ---
     **💡 Dicas:**
-    - Formato da transcrição: `00:00:00:00 - 00:00:10:00 Texto aqui`
-    - Para cortes contínuos, especifique a duração (ex: "10 minutos")
-    - Use palavras-chave específicas do conteúdo para melhor precisão
-    """)
-if __name__ == "__main__":
-    demo.launch()

         genai.configure(api_key=GEMINI_API_KEY)
         LLM = genai.GenerativeModel(LLM_MODEL_NAME)
         LLM_AVAILABLE = True
+        print("✓ IA Gemini configurada com sucesso")
     else:
         LLM = None
+        print("⚠ GEMINI_API_KEY não encontrada")
+except Exception as e:
     LLM = None
     LLM_AVAILABLE = False
+    print(f"⚠ Erro ao configurar IA: {e}")
 # Config
 FPS = 24
     lines = txt.splitlines()
     results: List[Segment] = []
     pattern = re.compile(
         r'^\s*\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—–]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s*(.*)$'
     )
     for line in lines:
         line = line.strip()
         if not line or line == "Desconhecido":
             continue
             start_tc, end_tc, text = match.groups()
             text = text.strip()
             if not text or text == "Desconhecido":
                 continue
                         score=0.0
                     ))
             except Exception as e:
+                print(f"⚠ Erro ao processar linha: {e}")
                 continue
     print(f"✓ Encontrados {len(results)} segmentos na transcrição")
     return results
 # ============ MANUAL TIMECODES ============
         match = re.search(pattern, instructions_lower)
         if match:
             duration = float(match.group(1))
+            print(f"✓ Duração extraída: {duration} minutos")
             break
     # Extrai palavras-chave importantes
     keywords = []
     topic_keywords = {
         'tenista': ['tenista', 'tênis', 'jogador', 'kinguios'],
         'maria': ['maria', 'josé', 'casal', 'seguro', 'carro'],
         if any(term in instructions_lower for term in terms):
             keywords.append(key)
+    print(f"✓ Keywords encontradas: {keywords}")
     return duration, keywords
 def find_segment_by_content(segs: List[Segment], keywords: List[str]) -> int:
             best_score = score
             best_idx = idx
+    print(f"✓ Melhor match no segmento {best_idx} (score: {best_score})")
     return best_idx
 def ai_find_start_point(segs: List[Segment], instructions: str, keywords: List[str]) -> int:
     """Usa IA para encontrar ponto de início"""
     if not LLM_AVAILABLE:
+        print("⚠ IA não disponível, usando busca por keywords")
         return find_segment_by_content(segs, keywords)
     # Cria resumo dos primeiros 150 segmentos
     for i, s in enumerate(segs[:150]):
         duration = (s.end_f - s.start_f) / FPS
         segments_preview.append(
+            f"{i}. [{s.start_tc}] ({duration:.1f}s) {s.text[:80]}"
         )
     prompt = f"""Você é um editor de vídeo. Encontre o índice do segmento onde deve COMEÇAR o corte.
 IMPORTANTE:
 - Analise onde está o conteúdo solicitado
 - Retorne APENAS o número do índice (exemplo: 87)
+- Considere o contexto e o início da história relevante
 RESPONDA APENAS COM O NÚMERO:"""
     try:
+        print("🤖 Consultando IA...")
         response = LLM.generate_content(prompt, generation_config={
             "temperature": 0.1,
             "max_output_tokens": 50
         })
         text = (response.text or "").strip()
+        print(f"IA respondeu: {text}")
         match = re.search(r'\b(\d+)\b', text)
         if match:
                 return idx
     except Exception as e:
+        print(f"⚠ Erro na IA: {e}")
+    # Fallback
     fallback_idx = find_segment_by_content(segs, keywords)
+    print(f"✓ Usando fallback no segmento {fallback_idx}")
     return fallback_idx
 def create_continuous_cut(segs: List[Segment], start_idx: int, duration_minutes: float) -> List[Segment]:
     start_frame = start_seg.start_f
     end_frame = start_frame + target_frames
+    # Garante que não ultrapassa o último segmento
+    max_frame = segs[-1].end_f
+    if end_frame > max_frame:
+        end_frame = max_frame
+        actual_duration = (end_frame - start_frame) / FPS / 60
+        print(f"⚠ Ajustado para {actual_duration:.1f} min (limite da transcrição)")
+    # Cria texto combinado
     involved_segs = []
     for seg in segs[start_idx:]:
         if seg.start_f < end_frame:
         score=100.0
     )
+    print(f"✓ Corte criado: {result.start_tc} → {result.end_tc}")
     return [result]
 def ai_select_segments(segs: List[Segment], instructions: str) -> List[Segment]:
     if not segs:
         raise ValueError("Nenhum segmento disponível")
+    print(f"📝 Processando instruções: {instructions[:100]}...")
     # Extrai duração e palavras-chave
     duration, keywords = extract_duration_and_keywords(instructions)
     if duration:
         # Modo: corte contínuo de X minutos
+        print(f"Modo: CORTE CONTÍNUO de {duration} minutos")
         start_idx = ai_find_start_point(segs, instructions, keywords)
         result = create_continuous_cut(segs, start_idx, duration)
         return result
     else:
+        # Modo: seleção múltipla (fallback)
         print("⚠ Duração não especificada, usando modo de seleção múltipla")
         start_idx = ai_find_start_point(segs, instructions, keywords)
         selected = segs[start_idx:start_idx + 10]
         if not selected:
                    weight_learn: float, weight_viral: float) -> List[Segment]:
     """Função principal de seleção"""
+    print("\n" + "="*60)
+    print("INICIANDO SELEÇÃO DE SEGMENTOS")
+    print("="*60)
     # Prioridade 1: Timecodes manuais
     manual_ranges = parse_manual_timecodes(manual_timecodes)
     if manual_ranges:
+        print(f"✓ Modo: MANUAL - {len(manual_ranges)} ranges")
         result_segs = []
         for start_tc, end_tc in manual_ranges:
             try:
     segs = parse_transcript_full(transcript_txt)
     if not segs:
+        raise ValueError("Nenhum segmento válido encontrado. Formato esperado: 00:00:00:00 - 00:00:10:00 Texto")
+    # Prioridade 2: Instruções em linguagem natural
+    if natural_instructions.strip():
+        print(f"✓ Modo: LINGUAGEM NATURAL")
+        print(f"   Instruções: {natural_instructions[:100]}...")
+        print(f"   IA disponível: {LLM_AVAILABLE}")
+        # Funciona mesmo sem IA, usando keywords
         return ai_select_segments(segs, natural_instructions)
     # Prioridade 3: Modo automático com pontuação
+    print(f"✓ Modo: AUTOMÁTICO por pontuação")
     weights = {
         "emotion": weight_emotion,
         "break": weight_break,
     if v_tpl is None or a_tpl is None:
         raise ValueError("Clipitem template não encontrado")
     def deep_copy(elem):
         new = ET.Element(elem.tag, attrib=elem.attrib)
         new.text = elem.text
         v_id = f"clip-v-{idx}"
         a_id = f"clip-a-{idx}"
+        # Video clip
         v_ci = ET.Element("clipitem", {"id": v_id})
         v_name = ET.SubElement(v_ci, "name")
         v_name.text = f"Clip {idx}"
         v_link = ET.SubElement(v_ci, "link")
         ET.SubElement(v_link, "linkclipref").text = a_id
+        # Audio clip
         a_ci = ET.Element("clipitem", {"id": a_id})
         a_name = ET.SubElement(a_ci, "name")
         a_name.text = f"Clip {idx}"
     """Processa XML e transcrição"""
     if not xml_file:
+        return "❌ Envie o arquivo XML do Premiere", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
     manual_ranges = parse_manual_timecodes(manual_timecodes)
     has_instructions = natural_instructions.strip() != ""
         mode = "MANUAL"
         transcript = ""
     elif has_instructions:
+        mode = "IA (Linguagem Natural)" if (use_llm and LLM_AVAILABLE) else "Linguagem Natural (sem IA)"
         if not txt_file:
+            return "❌ Para usar linguagem natural, envie a transcrição (.txt)", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
         with open(txt_file.name, "r", encoding="utf-8") as f:
             transcript = f.read()
     else:
         mode = "AUTOMÁTICO"
         if not txt_file:
+            return "❌ Envie a transcrição (.txt)", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
         with open(txt_file.name, "r", encoding="utf-8") as f:
             transcript = f.read()
         )
         if not segs:
+            return "❌ Nenhum segmento foi selecionado", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
         # Edita XML
         tree = ET.parse(xml_file.name)
         # Gera resumo
         total_duration = sum((s.end_f - s.start_f) / FPS for s in segs)
+        resumo = f"✅ {len(segs)} corte(s) criado(s) | Duração total: {total_duration/60:.1f} min | Modo: {mode}\n\n"
         for i, s in enumerate(segs, 1):
             dur = (s.end_f - s.start_f) / FPS
                 resumo += f"   {s.text[:150]}\n"
             resumo += "\n"
+        status = f"✅ Sucesso! | Modo: {mode} | Duração: {total_duration/60:.1f} min | LLM: {'✓' if LLM_AVAILABLE else '✗'}"
+        print(f"\n{status}\n")
         return resumo, out_path, status
     except Exception as e:
         import traceback
         error_detail = traceback.format_exc()
+        print(f"\n❌ ERRO:\n{error_detail}\n")
+        return f"❌ Erro: {str(e)}\n\nDetalhes no console", None, f"LLM: {'✓' if LLM_AVAILABLE else '✗'}"
 # ============ CSS & GRADIO APP ============
 css = """
             use_llm = gr.Checkbox(
                 label="🤖 Usar IA (Gemini)",
                 value=USE_LLM_DEFAULT and LLM_AVAILABLE,
+                info="Requer GEMINI_API_KEY configurada" if not LLM_AVAILABLE else "IA configurada ✓"
             )
             num_segments = gr.Slider(
                 2, 20, 5, step=1,
 - `Crie um corte de 15 minutos com os melhores momentos`
 - `Faça um corte de 5 minutos sobre Maria e José`
 - `Corte de 8 minutos a partir de onde fala sobre protocolo`
+**IMPORTANTE:** Sempre especifique a duração desejada (ex: "10 minutos")
         """)
         natural_instructions = gr.Textbox(
             label="Suas instruções",
     gr.Markdown("""
     ---
     **💡 Dicas:**
+    - Formato da transcrição: `00:00:00:00 - 00