import os import re import json import xml.etree.ElementTree as ET from dataclasses import dataclass from typing import List, Tuple, Optional, Callable import gradio as gr # ========================= # Configurações Gerais # ========================= OUTPUT_DIR = "./Output" os.makedirs(OUTPUT_DIR, exist_ok=True) # ========================= # LLM (Gemini) # ========================= USE_LLM_DEFAULT = True GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip() LLM_AVAILABLE = False LLM_MODEL_NAME = "gemini-2.0-flash-exp" try: if GEMINI_API_KEY: import google.generativeai as genai genai.configure(api_key=GEMINI_API_KEY) LLM = genai.GenerativeModel(LLM_MODEL_NAME) LLM_AVAILABLE = True else: LLM = None except Exception: LLM = None LLM_AVAILABLE = False # ========================= # Modelos # ========================= @dataclass class Segment: start_tc: str end_tc: str start_f: int end_f: int text: str score: float # ========================= # Funções de Timecode # ========================= def _tc_to_hmsf(tc: str, fps: int) -> Tuple[int, int, int, int]: """Converte timecode para (hh, mm, ss, ff).""" s = tc.strip() m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[:;](\d{2})$', s) if m: hh, mm, ss, ff = map(int, m.groups()) return hh, mm, ss, ff m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[.,](\d{1,3})$', s) if m: hh, mm, ss, ms = map(int, m.groups()) ff = int(round((ms / 1000.0) * fps)) if ff >= fps: ss += 1 ff = 0 return hh, mm, ss, ff m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})$', s) if m: hh, mm, ss = map(int, m.groups()) return hh, mm, ss, 0 raise ValueError(f"Timecode inválido: {tc}") def parse_timecode_to_frames(tc: str, fps: int) -> int: hh, mm, ss, ff = _tc_to_hmsf(tc, fps) return hh * 3600 * fps + mm * 60 * fps + ss * fps + ff def frames_to_timecode(frames: int, fps: int) -> str: hh = frames // (3600 * fps) rem = frames % (3600 * fps) mm = rem // (60 * fps) rem = rem % (60 * fps) ss = rem // fps ff = rem % fps return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}" # ========================= # Parser de Transcrição # ========================= def parse_transcript(txt: str, fps: int) -> List[Segment]: """Parser robusto para múltiplos formatos (intervalos e WEBVTT/SRT).""" if not txt or not txt.strip(): return [] lines = [l.rstrip() for l in txt.splitlines()] results: List[Segment] = [] line_range = re.compile( r'^\s*\[?\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-—–]\s*' r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*\]?\s*(.*)$' ) arrow = re.compile( r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)\s*-->\s*' r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)' ) i = 0 while i < len(lines): raw = lines[i].strip() if not raw or raw.lower() == "desconhecido": i += 1 continue m = line_range.match(raw) if m: start_tc, end_tc, trailing_text = m.groups() text_parts = [] if trailing_text.strip(): text_parts.append(trailing_text.strip()) else: j = i + 1 while j < len(lines): nxt = lines[j].strip() if not nxt or line_range.match(nxt) or re.match(r'^\d+\s*$', nxt) or arrow.search(nxt): break text_parts.append(nxt) j += 1 i = j - 1 text = " ".join(text_parts).strip() try: sf = parse_timecode_to_frames(start_tc, fps) ef = parse_timecode_to_frames(end_tc, fps) if ef > sf: results.append(Segment( start_tc=frames_to_timecode(sf, fps), end_tc=frames_to_timecode(ef, fps), start_f=sf, end_f=ef, text=text if text else f"{start_tc} - {end_tc}", score=0.0 )) except Exception: pass i += 1 continue # Bloco estilo VTT/SRT: "00:00:01,000 --> 00:00:03,000" if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])): line_with_tc = raw if arrow.search(raw) else lines[i + 1] mm = arrow.search(line_with_tc) if mm: start_tc, end_tc = mm.groups() j = i + 1 if line_with_tc == raw else i + 2 text_parts = [] while j < len(lines): nxt = lines[j].strip() if not nxt: break if re.match(r'^\d+\s*$', nxt) and (j + 1 < len(lines) and arrow.search(lines[j + 1])): break if arrow.search(nxt): break text_parts.append(nxt) j += 1 text = " ".join(text_parts).strip() try: sf = parse_timecode_to_frames(start_tc, fps) ef = parse_timecode_to_frames(end_tc, fps) if ef > sf: results.append(Segment( start_tc=frames_to_timecode(sf, fps), end_tc=frames_to_timecode(ef, fps), start_f=sf, end_f=ef, text=text, score=0.0 )) except Exception: pass i = j + 1 continue i += 1 return results # ========================= # Minutagens Manuais # ========================= def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]: if not manual_input or not manual_input.strip(): return [] manual_ranges = [] lines = manual_input.replace(",", "\n").splitlines() pattern = re.compile( r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-–—]\s*' r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)' ) for line in lines: m = pattern.search(line.strip()) if m: manual_ranges.append((m.group(1), m.group(2))) return manual_ranges # ========================= # Helpers para JSON do LLM # ========================= def _extract_json_block(text: str) -> str: """ Extrai o melhor bloco JSON da resposta do LLM. 1) Prioriza bloco entre ```json ... ``` 2) Senão, recorta do primeiro '{' até o '}' pareado. """ if not text: raise ValueError("Resposta vazia do LLM") m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE) if m: return m.group(1).strip() start = text.find("{") if start == -1: raise ValueError("Nenhum '{' encontrado na resposta do LLM") depth = 0 for i in range(start, len(text)): c = text[i] if c == "{": depth += 1 elif c == "}": depth -= 1 if depth == 0: return text[start:i + 1].strip() end = text.rfind("}") if end != -1 and end > start: return text[start:end + 1].strip() raise ValueError("Não foi possível delimitar um JSON na resposta do LLM") def _coerce_to_strict_json(s: str) -> str: """ Tenta consertar JSONs “quase-JSON”. - remove comentários - troca aspas curvas por retas - remove vírgulas finais - adiciona aspas em chaves não-aspadas - tenta converter aspas simples para duplas """ t = s.strip() t = re.sub(r"^json\s*", "", t, flags=re.IGNORECASE).strip().strip("`") t = (t.replace("\u201c", '"') .replace("\u201d", '"') .replace("\u2018", "'") .replace("\u2019", "'")) t = re.sub(r"//.*?$", "", t, flags=re.MULTILINE) t = re.sub(r"/\*[\s\S]*?\*/", "", t) t = re.sub(r",(\s*[}\]])", r"\1", t) t = re.sub(r'([{,]\s*)([A-Za-z_][A-Za-z0-9_]*)\s*:', r'\1"\2":', t) try: json.loads(t) return t except Exception: t2 = re.sub(r"'", '"', t) return t2 # ========================= # IA: Análise Inteligente com Gemini # ========================= def ai_analyze_and_select( segments: List[Segment], command: str, fps: int, progress_callback: Optional[Callable[[str], None]] = None ) -> List[Segment]: """ Usa Gemini para analisar a transcrição completa e identificar os melhores trechos. Com extração/saneamento robusto do JSON para evitar quebras. """ if not LLM_AVAILABLE or not segments: raise ValueError("IA não disponível ou sem segmentos para analisar") if progress_callback: progress_callback("Etapa 1/3: preparando dados para análise...") # Prepara a transcrição completa com índices transcript_data = [] for i, seg in enumerate(segments): duration_sec = max(0, (seg.end_f - seg.start_f) / fps) transcript_data.append({ "index": i, "timecode": seg.start_tc, "duration_sec": round(duration_sec, 1), "text": (seg.text or "")[:200] }) transcript_json = json.dumps(transcript_data, ensure_ascii=False, indent=2) if progress_callback: progress_callback(f"Etapa 2/3: analisando {len(segments)} segmentos com IA...") prompt = f"""Você é um especialista em edição de vídeo. Analise a transcrição e identifique os MELHORES trechos baseado no comando do usuário. COMANDO DO USUÁRIO: {command} TRANSCRIÇÃO COMPLETA (formato JSON com index, timecode, duração e texto): {transcript_json} INSTRUÇÕES: 1. Leia o comando com atenção e identifique: - Quantidade de cortes desejada - Duração de cada corte (em segundos) - Tema/assunto/palavras-chave mencionados - Timecode de início (se mencionado) 2. Analise TODA a transcrição e identifique os segmentos que melhor correspondem ao comando 3. Responda APENAS com JSON estrito: {{ "cuts": [ {{ "start_index": , "duration_seconds": , "reason": "" }} ] }}""" # Chamada ao LLM (alguns SDKs permitem mime_type JSON; se falhar, cai no básico) try: response = LLM.generate_content( prompt, generation_config={ "temperature": 0.2, "max_output_tokens": 2000, "response_mime_type": "application/json" } ) except Exception: response = LLM.generate_content( prompt, generation_config={ "temperature": 0.2, "max_output_tokens": 2000 } ) response_text = (getattr(response, "text", "") or "").strip() if progress_callback: progress_callback("Etapa 3/3: processando resposta da IA...") # Extrai e parseia o JSON de forma resiliente try: raw_json = _extract_json_block(response_text) try: result = json.loads(raw_json) except json.JSONDecodeError: fixed = _coerce_to_strict_json(raw_json) result = json.loads(fixed) except Exception as e: snippet = response_text[:600].replace("\n", " ") raise ValueError(f"Erro ao processar resposta da IA (JSON inválido): {e}. Amostra: {snippet}") cuts_data = result.get("cuts", []) if not isinstance(cuts_data, list) or not cuts_data: raise ValueError("IA não retornou a lista 'cuts' com itens válidos") selected_segments: List[Segment] = [] for cut_info in cuts_data: try: start_idx = int(cut_info.get("start_index", 0)) duration_sec = int(cut_info.get("duration_seconds", 60)) reason = str(cut_info.get("reason", "")).strip() except Exception: continue if start_idx < 0 or start_idx >= len(segments): continue start_seg = segments[start_idx] start_frame = start_seg.start_f duration_frames = max(0, int(duration_sec * fps)) end_frame = start_frame + duration_frames text_parts = [f"[IA] {reason}"] if reason else [] for seg in segments[start_idx:]: if seg.start_f < end_frame: if seg.text: text_parts.append(seg.text[:150]) else: break combined_text = " [...] ".join(text_parts)[:500] selected_segments.append(Segment( start_tc=frames_to_timecode(start_frame, fps), end_tc=frames_to_timecode(end_frame, fps), start_f=start_frame, end_f=end_frame, text=combined_text, score=100.0 )) return selected_segments # ========================= # Processamento com Comando Manual (sem IA) # ========================= def manual_command_processing(segments: List[Segment], command: str, fps: int) -> List[Segment]: """Fallback: processamento básico sem IA para comandos simples.""" s = (command or "").lower() # quantidade count = 1 m = re.search(r'(\d+)\s*(?:cortes?|clipes?|segmentos?)', s) if m: count = int(m.group(1)) # duração duration_sec = 60 m = re.search(r'(\d+)\s*(?:segundos?|s\b)', s) if m: duration_sec = int(m.group(1)) else: m = re.search(r'(\d+)\s*(?:minutos?|min\b)', s) if m: duration_sec = int(m.group(1)) * 60 # timecode inicial start_frame = 0 m = re.search(r'(?:começando|a partir de)\s+(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)', s) if m: try: start_frame = parse_timecode_to_frames(m.group(1), fps) except Exception: pass # cortes contínuos results = [] base_frame = start_frame for i in range(count): duration_frames = duration_sec * fps end_frame = base_frame + duration_frames text_parts = [] for seg in segments: if seg.start_f >= base_frame and seg.start_f < end_frame: if seg.text: text_parts.append(seg.text[:100]) combined_text = " [...] ".join(text_parts[:10])[:400] results.append(Segment( start_tc=frames_to_timecode(base_frame, fps), end_tc=frames_to_timecode(end_frame, fps), start_f=base_frame, end_f=end_frame, text=combined_text if combined_text else f"Corte {i+1}", score=50.0 )) base_frame = end_frame return results # ========================= # Modo Automático # ========================= def auto_score_segments( segs: List[Segment], num_segments: int, custom_keywords: str, weight_emotion: float, weight_break: float, weight_learn: float, weight_viral: float ) -> List[Segment]: """Sistema de pontuação automática simples por palavras-chave.""" emotion_words = ['medo', 'coragem', 'amor', 'ódio', 'paixão', 'alegria', 'tristeza'] break_words = ['nunca', 'de repente', 'surpreendente', 'inesperado', 'incrível'] learn_words = ['aprendi', 'descobri', 'entendi', 'percebi', 'lição'] viral_words = ['segredo', 'verdade', 'revelação', 'exclusivo', 'confissão'] for s in segs: score = 0.0 text = (s.text or "").lower() for word in emotion_words: if word in text: score += weight_emotion for word in break_words: if word in text: score += weight_break for word in learn_words: if word in text: score += weight_learn for word in viral_words: if word in text: score += weight_viral if custom_keywords: for kw in custom_keywords.split(","): kw_clean = kw.strip().lower() if kw_clean and kw_clean in text: score += 5.0 s.score = score segs.sort(key=lambda x: x.score, reverse=True) return segs[:max(1, num_segments)] # ========================= # Edição de XML # ========================= def deep_copy_element(elem: ET.Element) -> ET.Element: new = ET.Element(elem.tag, attrib=dict(elem.attrib)) new.text = elem.text new.tail = elem.tail for child in elem: new.append(deep_copy_element(child)) return new def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree: root = tree.getroot() seq = root.find(".//sequence") if seq is None: raise ValueError("Sequence não encontrada no XML") v_track = seq.find(".//media/video/track") a_track = seq.find(".//media/audio/track") if not v_track or not a_track: raise ValueError("Trilhas de vídeo/áudio não encontradas") v_template = v_track.find("./clipitem") a_template = a_track.find("./clipitem") for clip in list(v_track.findall("./clipitem")): v_track.remove(clip) for clip in list(a_track.findall("./clipitem")): a_track.remove(clip) timeline_pos = 0 for i, seg in enumerate(segs, 1): duration = seg.end_f - seg.start_f if duration <= 0: continue v_clip = ET.Element("clipitem", {"id": f"clip-v{i}"}) ET.SubElement(v_clip, "name").text = f"Clip {i}" ET.SubElement(v_clip, "start").text = str(timeline_pos) ET.SubElement(v_clip, "end").text = str(timeline_pos + duration) ET.SubElement(v_clip, "in").text = str(seg.start_f) ET.SubElement(v_clip, "out").text = str(seg.end_f) if v_template is not None: rate = v_template.find("rate") if rate is not None: v_clip.append(deep_copy_element(rate)) file_elem = v_template.find("file") if file_elem is not None: v_clip.append(deep_copy_element(file_elem)) a_clip = ET.Element("clipitem", {"id": f"clip-a{i}"}) ET.SubElement(a_clip, "name").text = f"Clip {i}" ET.SubElement(a_clip, "start").text = str(timeline_pos) ET.SubElement(a_clip, "end").text = str(timeline_pos + duration) ET.SubElement(a_clip, "in").text = str(seg.start_f) ET.SubElement(a_clip, "out").text = str(seg.end_f) if a_template is not None: rate = a_template.find("rate") if rate is not None: a_clip.append(deep_copy_element(rate)) file_elem = a_template.find("file") if file_elem is not None: a_clip.append(deep_copy_element(file_elem)) v_track.append(v_clip) a_track.append(a_clip) timeline_pos += duration return tree # ========================= # Seleção (orquestração) # ========================= def select_segments( transcript_txt: str, use_llm: bool, num_segments: int, custom_keywords: str, manual_timecodes: str, natural_instructions: str, weight_emotion: float, weight_break: float, weight_learn: float, weight_viral: float, fps: int, progress_callback: Optional[Callable[[str], None]] = None ) -> List[Segment]: # 1) Manual manual = parse_manual_timecodes(manual_timecodes) if manual: result = [] for start_tc, end_tc in manual: try: result.append(Segment( start_tc=frames_to_timecode(parse_timecode_to_frames(start_tc, fps), fps), end_tc=frames_to_timecode(parse_timecode_to_frames(end_tc, fps), fps), start_f=parse_timecode_to_frames(start_tc, fps), end_f=parse_timecode_to_frames(end_tc, fps), text=f"Manual: {start_tc} - {end_tc}", score=100.0 )) except Exception: pass return result # 2) Parser de transcrição segs = parse_transcript(transcript_txt, fps) if transcript_txt else [] # 3) Linguagem natural if natural_instructions.strip(): if use_llm and LLM_AVAILABLE and segs: return ai_analyze_and_select(segs, natural_instructions, fps, progress_callback) elif segs: return manual_command_processing(segs, natural_instructions, fps) else: raise ValueError( "Para usar comandos em linguagem natural, forneça uma transcrição " "ou use minutagens manuais." ) # 4) Automático if not segs: raise ValueError("Nenhum segmento encontrado. Envie transcrição, minutagens ou um comando em linguagem natural.") return auto_score_segments( segs, num_segments, custom_keywords, weight_emotion, weight_break, weight_learn, weight_viral ) # ========================= # Pipeline principal # ========================= def process_files( xml_file, txt_file, use_llm, num_segments, custom_keywords, manual_timecodes, natural_instructions, weight_emotion, weight_break, weight_learn, weight_viral, fps, progress=gr.Progress() ): if not xml_file: return "Envie o XML do Premiere", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" try: debug_info = [] def progress_callback(msg): progress(0.5, desc=msg) debug_info.append(msg) progress(0.1, desc="Carregando arquivos...") transcript = "" manual = parse_manual_timecodes(manual_timecodes) if not manual and txt_file: with open(txt_file.name, "r", encoding="utf-8-sig") as f: transcript = f.read() debug_info.append(f"Transcrição: {len(transcript)} caracteres") progress(0.2, desc="Selecionando segmentos...") segments = select_segments( transcript, bool(use_llm) and LLM_AVAILABLE, int(num_segments), custom_keywords, manual_timecodes, natural_instructions, float(weight_emotion), float(weight_break), float(weight_learn), float(weight_viral), int(fps), progress_callback ) if not segments: return "Nenhum segmento selecionado", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" # Validar duração mínima: pelo menos 1 segundo valid_segments = [] for seg in segments: if seg.end_f > seg.start_f and (seg.end_f - seg.start_f) >= max(1, int(fps)): valid_segments.append(seg) if not valid_segments: return "Segmentos inválidos (duração muito curta)", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" segments = valid_segments debug_info.append(f"{len(segments)} segmento(s) válidos") progress(0.7, desc="Editando XML...") tree = ET.parse(xml_file.name) tree = edit_xml(tree, segments) basename = os.path.splitext(os.path.basename(xml_file.name))[0] output_path = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml") tree.write(output_path, encoding="utf-8", xml_declaration=True) progress(0.9, desc="Gerando resumo...") total_sec = sum((s.end_f - s.start_f) / fps for s in segments) total_min = total_sec / 60.0 if manual: mode = "Manual" elif natural_instructions.strip() and use_llm and LLM_AVAILABLE: mode = "IA Completa (Gemini)" elif natural_instructions.strip(): mode = "Básico (sem IA)" else: mode = "Automático" summary_lines = [ "RESULTADO", f"- Cortes: {len(segments)}", f"- Duração total: {total_min:.1f} min", f"- Modo: {mode}", "" ] for i, seg in enumerate(segments, 1): dur_sec = (seg.end_f - seg.start_f) / fps dur_min = dur_sec / 60.0 line = f"Corte {i}\n {seg.start_tc} -> {seg.end_tc} ({dur_min:.2f} min / {dur_sec:.0f}s)" if seg.text and len(seg.text.strip()) > 10: text_preview = seg.text[:200].strip() if len(seg.text) > 200: text_preview += "..." line += f"\n {text_preview}" summary_lines.append(line) summary_lines.append("") if debug_info: summary_lines.append("Log do processamento:") summary_lines.extend(f"- {info}" for info in debug_info) summary = "\n".join(summary_lines) status = f"Sucesso | {mode} | {total_min:.1f} min | LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" progress(1.0, desc="Concluído") return summary, output_path, status except Exception as e: import traceback error_trace = traceback.format_exc() print(error_trace) error_msg = f"Erro: {str(e)}\n\nDetalhes:\n{error_trace[:800]}" return error_msg, None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" # ========================= # Interface Gradio # ========================= with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere - IA") as demo: gr.Markdown("# Editor XML Premiere - IA Completa (Gemini)") status_inicial = f"{'IA Gemini ativa' if LLM_AVAILABLE else 'IA desabilitada: configure GEMINI_API_KEY'}" gr.Markdown(f"Status: {status_inicial}") with gr.Row(): xml_in = gr.File(label="XML do Premiere", file_types=[".xml"]) txt_in = gr.File(label="Transcrição (.txt) — obrigatória para IA", file_types=[".txt"]) with gr.Row(): use_llm = gr.Checkbox( label="Usar IA Gemini (análise completa — recomendado)", value=USE_LLM_DEFAULT and LLM_AVAILABLE, interactive=LLM_AVAILABLE ) num_segments = gr.Slider(2, 20, 5, 1, label="Quantidade de segmentos (modo automático)") fps_in = gr.Slider(12, 60, 24, 1, label="FPS") with gr.Accordion("Comando em linguagem natural (modo principal)", open=True): gr.Markdown( "Exemplos: \n" '- "Crie 3 cortes de 30 segundos sobre disciplina"\n' '- "2 clipes de 1 minuto falando sobre Maria"\n' '- "Corte de 5 minutos começando em 00:02:00:00 sobre tecnologia"' ) natural_instructions = gr.Textbox( label="Digite seu comando", placeholder='Ex: "Crie 3 cortes de 45 segundos sobre os momentos de disciplina e superação"', lines=4 ) with gr.Accordion("Minutagens manuais (precisão total)", open=False): gr.Markdown("Ignora IA e outros modos.") manual_timecodes = gr.Textbox( label="Timecodes (um por linha)", placeholder="00:21:18:09 - 00:31:18:09\n00:45:20:15 - 00:50:10:22", lines=4 ) with gr.Accordion("Modo automático (sem comando)", open=False): gr.Markdown("Sistema de pontuação simples por palavras-chave.") custom_keywords = gr.Textbox( label="Palavras-chave (separadas por vírgula)", placeholder="coragem, superação, vitória" ) with gr.Row(): weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: emoção") weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: quebra") with gr.Row(): weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: aprendizado") weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: viral") btn = gr.Button("Processar") with gr.Row(): with gr.Column(scale=2): summary_out = gr.Textbox(label="Resumo dos cortes", lines=20, max_lines=30) with gr.Column(scale=1): status_out = gr.Textbox(label="Status", lines=3) file_out = gr.File(label="Download XML editado") btn.click( process_files, [xml_in, txt_in, use_llm, num_segments, custom_keywords, manual_timecodes, natural_instructions, weight_emotion, weight_break, weight_learn, weight_viral, fps_in], [summary_out, file_out, status_out] ) if __name__ == "__main__": demo.launch()