Spaces:

leicam
/

EditorAutomaticoXML

Sleeping

App Files Files Community

leicam commited on Sep 30, 2025

Commit

03c0164

verified ·

1 Parent(s): f9e0f33

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -28

app.py CHANGED Viewed

@@ -108,6 +108,26 @@ def keyword_score(text: str, custom_keywords: str = "", weight_emotion: float =
     score += 0.0005 * len(text)
     return score
 def llm_rank_segments(candidates: List[Segment], num_segments: int, custom_instructions: str = "") -> List[Segment]:
     """Ask the LLM to pick segments based on criteria."""
     if not LLM_AVAILABLE:
@@ -140,9 +160,39 @@ def llm_rank_segments(candidates: List[Segment], num_segments: int, custom_instr
     return candidates[:num_segments]
 def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
-                   custom_keywords: str, custom_instructions: str,
                    weight_emotion: float, weight_break: float,
                    weight_learn: float, weight_viral: float) -> List[Segment]:
     segs = parse_transcript(transcript_txt)
     if not segs:
         raise ValueError("Nenhum trecho válido encontrado na transcrição.")
@@ -154,7 +204,7 @@ def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
     top = segs[:min(20, len(segs))]
     if use_llm and LLM_AVAILABLE:
-        ranked = llm_rank_segments(top, num_segments, custom_instructions)
         return ranked
     return top[:num_segments]
@@ -266,16 +316,26 @@ def edit_sequence_with_segments(tree: ET.ElementTree, segs: List[Segment]) -> ET
 # ---- Gradio app ----
 def process_xml_and_transcript(premiere_xml_file, transcript_txt_file, use_llm,
-                               num_segments, custom_keywords, custom_instructions,
                                weight_emotion, weight_break, weight_learn, weight_viral):
-    if premiere_xml_file is None or transcript_txt_file is None:
-        return "Envie o XML do Premiere e a transcrição em .txt.", None, f"LLM disponível: {LLM_AVAILABLE}"
-    with open(transcript_txt_file.name, "r", encoding="utf-8") as f:
-        transcript = f.read()
     segs = select_segments(transcript, use_llm and LLM_AVAILABLE, num_segments,
-                          custom_keywords, custom_instructions,
                           weight_emotion, weight_break, weight_learn, weight_viral)
     tree = ET.parse(premiere_xml_file.name)
@@ -285,13 +345,16 @@ def process_xml_and_transcript(premiere_xml_file, transcript_txt_file, use_llm,
     out_path = os.path.join(OUTPUT_DIR, f"{base}_EDITADO.xml")
     tree.write(out_path, encoding="utf-8", xml_declaration=True)
-    resumo = f"✂️ {len(segs)} cortes aplicados (24 fps):\n\n"
     for i, s in enumerate(segs, 1):
         dur_sec = (s.end_f - s.start_f) / FPS
         resumo += f"{i}. {s.start_tc} → {s.end_tc} ({dur_sec:.1f}s)\n"
-        resumo += f"   Score: {s.score:.1f} | {s.text[:150]}\n\n"
-    status = f"✓ LLM disponível: {LLM_AVAILABLE} | LLM usado: {use_llm and LLM_AVAILABLE}"
     return resumo, out_path, status
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -301,7 +364,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column():
             xml_in = gr.File(label="📁 XML da sequência (FCP XML)", file_types=[".xml"])
-            txt_in = gr.File(label="📄 Transcrição (.txt) com timecodes", file_types=[".txt"])
         with gr.Column():
             gr.Markdown("### ⚙️ Configurações")
@@ -312,23 +375,29 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             )
             num_segments = gr.Slider(
                 minimum=2, maximum=10, step=1, value=5,
-                label="📊 Número de segmentos a selecionar",
                 info="Quantos trechos incluir no vídeo final"
             )
     with gr.Accordion("🎯 Palavras-chave Personalizadas", open=False):
         custom_keywords = gr.Textbox(
             label="Adicione palavras-chave importantes (separadas por vírgula)",
             placeholder="Exemplo: transformação, resultado, método, estratégia",
-            info="Trechos com essas palavras terão prioridade máxima (peso 3.0)"
-        )
-    with gr.Accordion("📝 Instruções em Texto Livre para o LLM", open=False):
-        custom_instructions = gr.Textbox(
-            label="Instruções adicionais para o LLM",
-            placeholder="Exemplo: Prefira trechos que mostrem resultados concretos e evite introduções longas",
-            lines=3,
-            info="Só funciona se o LLM estiver ativado"
         )
     with gr.Accordion("⚖️ Ajuste Fino dos Pesos de Pontuação", open=False):
@@ -354,16 +423,31 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     run_btn.click(
         process_xml_and_transcript,
         inputs=[xml_in, txt_in, use_llm, num_segments, custom_keywords,
-                custom_instructions, weight_emotion, weight_break, weight_learn, weight_viral],
         outputs=[resumo_out, file_out, status_out]
     )
     gr.Markdown("""
-    ### 💡 Dicas de uso:
-    - **Modo Heurístico**: Desative o LLM e ajuste os pesos para controle total baseado em palavras-chave
-    - **Modo LLM**: Ative o LLM e use as instruções em texto livre para guiar a seleção semanticamente
-    - **Híbrido**: Combine palavras-chave personalizadas + instruções LLM para máximo controle
-    - **Palavras-chave**: Adicione termos específicos do seu nicho que devem ter alta prioridade
     """)
 if __name__ == "__main__":

     score += 0.0005 * len(text)
     return score
+def parse_manual_timecodes(manual_input: str) -> List[tuple]:
+    """Parse manual timecode ranges from user input.
+    Expected format: hh:mm:ss:ff - hh:mm:ss:ff (one per line or comma-separated)
+    Returns list of (start_tc, end_tc) tuples
+    """
+    manual_ranges = []
+    # Replace commas with newlines for flexibility
+    normalized = manual_input.replace(",", "\n")
+    lines = [l.strip() for l in normalized.splitlines() if l.strip()]
+    pat = re.compile(r"(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-–—]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})")
+    for line in lines:
+        m = pat.search(line)
+        if m:
+            start_tc, end_tc = m.groups()
+            manual_ranges.append((start_tc, end_tc))
+    return manual_ranges
 def llm_rank_segments(candidates: List[Segment], num_segments: int, custom_instructions: str = "") -> List[Segment]:
     """Ask the LLM to pick segments based on criteria."""
     if not LLM_AVAILABLE:
     return candidates[:num_segments]
 def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
+                   custom_keywords: str, manual_timecodes: str,
                    weight_emotion: float, weight_break: float,
                    weight_learn: float, weight_viral: float) -> List[Segment]:
+    # Check if user provided manual timecodes
+    manual_ranges = parse_manual_timecodes(manual_timecodes)
+    if manual_ranges:
+        # Manual mode: use only the timecodes provided by user
+        result_segs = []
+        for start_tc, end_tc in manual_ranges:
+            try:
+                start_f = parse_timecode_to_frames(start_tc)
+                end_f = parse_timecode_to_frames(end_tc)
+                if end_f > start_f:
+                    result_segs.append(Segment(
+                        start_tc=start_tc,
+                        end_tc=end_tc,
+                        start_f=start_f,
+                        end_f=end_f,
+                        text=f"Corte manual {start_tc} - {end_tc}",
+                        score=100.0
+                    ))
+            except Exception as e:
+                print(f"Erro ao processar timecode manual {start_tc}-{end_tc}: {e}")
+                continue
+        if not result_segs:
+            raise ValueError("Nenhum timecode manual válido encontrado.")
+        return result_segs
+    # Automatic mode: use transcript + scoring
     segs = parse_transcript(transcript_txt)
     if not segs:
         raise ValueError("Nenhum trecho válido encontrado na transcrição.")
     top = segs[:min(20, len(segs))]
     if use_llm and LLM_AVAILABLE:
+        ranked = llm_rank_segments(top, num_segments, "")
         return ranked
     return top[:num_segments]
 # ---- Gradio app ----
 def process_xml_and_transcript(premiere_xml_file, transcript_txt_file, use_llm,
+                               num_segments, custom_keywords, manual_timecodes,
                                weight_emotion, weight_break, weight_learn, weight_viral):
+    if premiere_xml_file is None:
+        return "Envie o XML do Premiere.", None, f"LLM disponível: {LLM_AVAILABLE}"
+    # Check if manual timecodes were provided
+    manual_ranges = parse_manual_timecodes(manual_timecodes)
+    if manual_ranges:
+        # Manual mode: don't need transcript
+        transcript = ""
+    else:
+        # Automatic mode: need transcript
+        if transcript_txt_file is None:
+            return "Envie a transcrição em .txt ou forneça minutagens manuais.", None, f"LLM disponível: {LLM_AVAILABLE}"
+        with open(transcript_txt_file.name, "r", encoding="utf-8") as f:
+            transcript = f.read()
     segs = select_segments(transcript, use_llm and LLM_AVAILABLE, num_segments,
+                          custom_keywords, manual_timecodes,
                           weight_emotion, weight_break, weight_learn, weight_viral)
     tree = ET.parse(premiere_xml_file.name)
     out_path = os.path.join(OUTPUT_DIR, f"{base}_EDITADO.xml")
     tree.write(out_path, encoding="utf-8", xml_declaration=True)
+    mode = "MANUAL" if manual_ranges else "AUTOMÁTICO"
+    resumo = f"✂️ {len(segs)} cortes aplicados - Modo: {mode} (24 fps):\n\n"
     for i, s in enumerate(segs, 1):
         dur_sec = (s.end_f - s.start_f) / FPS
         resumo += f"{i}. {s.start_tc} → {s.end_tc} ({dur_sec:.1f}s)\n"
+        if not manual_ranges:
+            resumo += f"   Score: {s.score:.1f} | {s.text[:150]}\n"
+        resumo += "\n"
+    status = f"✓ Modo: {mode} | LLM disponível: {LLM_AVAILABLE} | LLM usado: {use_llm and LLM_AVAILABLE and not manual_ranges}"
     return resumo, out_path, status
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column():
             xml_in = gr.File(label="📁 XML da sequência (FCP XML)", file_types=[".xml"])
+            txt_in = gr.File(label="📄 Transcrição (.txt) - Opcional se usar minutagens manuais", file_types=[".txt"])
         with gr.Column():
             gr.Markdown("### ⚙️ Configurações")
             )
             num_segments = gr.Slider(
                 minimum=2, maximum=10, step=1, value=5,
+                label="📊 Número de segmentos (só no modo automático)",
                 info="Quantos trechos incluir no vídeo final"
             )
+    with gr.Accordion("✂️ MINUTAGENS MANUAIS (Sobrescreve tudo)", open=True):
+        manual_timecodes = gr.Textbox(
+            label="Cole aqui os timecodes exatos que você quer cortar",
+            placeholder="Exemplo:\n00:01:23:15 - 00:02:45:10\n00:05:30:00 - 00:07:15:22\n00:10:00:05 - 00:12:30:18",
+            lines=5,
+            info="⚠️ Se preencher este campo, o app ignora a transcrição e todos os outros parâmetros, cortando EXATAMENTE o que você especificou"
+        )
+        gr.Markdown("""
+        **Formatos aceitos:**
+        - `hh:mm:ss:ff - hh:mm:ss:ff` (um por linha)
+        - Pode separar por vírgula também
+        - Exemplo: `00:01:30:00 - 00:02:00:15, 00:05:10:00 - 00:06:20:10`
+        """)
     with gr.Accordion("🎯 Palavras-chave Personalizadas", open=False):
         custom_keywords = gr.Textbox(
             label="Adicione palavras-chave importantes (separadas por vírgula)",
             placeholder="Exemplo: transformação, resultado, método, estratégia",
+            info="Trechos com essas palavras terão prioridade máxima (peso 3.0) - Só funciona no modo automático"
         )
     with gr.Accordion("⚖️ Ajuste Fino dos Pesos de Pontuação", open=False):
     run_btn.click(
         process_xml_and_transcript,
         inputs=[xml_in, txt_in, use_llm, num_segments, custom_keywords,
+                manual_timecodes, weight_emotion, weight_break, weight_learn, weight_viral],
         outputs=[resumo_out, file_out, status_out]
     )
     gr.Markdown("""
+    ### 💡 Modos de uso:
+    **🎯 MODO MANUAL (Recomendado para controle total)**
+    - Preencha o campo "Minutagens Manuais" com seus timecodes exatos
+    - A transcrição se torna opcional
+    - Todos os outros parâmetros são ignorados
+    - O corte será feito EXATAMENTE como você especificou
+    **🤖 MODO AUTOMÁTICO**
+    - Deixe as minutagens manuais vazias
+    - Envie a transcrição com timecodes
+    - Configure LLM, palavras-chave e pesos conforme desejado
+    - O app escolhe os melhores trechos automaticamente
+    **Exemplos de minutagens manuais:**
+    ```
+    00:01:23:15 - 00:02:45:10
+    00:05:30:00 - 00:07:15:22
+    00:10:00:05 - 00:12:30:18
+    ```
     """)
 if __name__ == "__main__":