SeedVR2-3B

Running

App Files Files Community

Carlex22222 commited on Sep 9

Commit

d4386a4

verified ·

1 Parent(s): b9e2818

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -143

app.py CHANGED Viewed

@@ -1,180 +1,144 @@
-# app.py (Interface com Abas para SeedVR2 e VINCIE)
 import gradio as gr
 import os
-import uuid
-import shutil
 import subprocess
-import mimetypes
-# --- Diretórios Base ---
 SEEDVR_DIR = "/app/SeedVR"
 VINCIE_DIR = "/app/VINCIE"
-# --- Função de Logging em Tempo Real (Reutilizável) ---
-def stream_process_to_gradio(command, working_dir):
-    env = os.environ.copy()
-    env["PYTHONUNBUFFERED"] = "1"
-    log_output = f"Executando no diretório: {working_dir}\n"
-    log_output += f"Comando: {' '.join(command)}\n\n"
-    process = subprocess.Popen(
-        command,
-        cwd=working_dir,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding='utf-8',
-        env=env
-    )
-    # Loop para capturar e transmitir a saída
     while True:
         output = process.stdout.readline()
-        if output == '' and process.poll() is not None:
-            break
-        if output:
-            log_output += output
-            yield log_output
-    return_code = process.poll()
-    if return_code != 0:
-        raise gr.Error(f"A inferência falhou com o código {return_code}. Verifique os logs.")
-# --- Lógica de Inferência para SeedVR2 ---
-def run_seedvr2_inference(input_media, seed, res_h, res_w):
-    if input_media is None: raise gr.Error("Por favor, faça o upload de um arquivo para o SeedVR2.")
     job_id = str(uuid.uuid4())
-    input_dir = os.path.join("/app", "temp_inputs", job_id)
-    output_dir = os.path.join("/app", "temp_outputs", job_id)
-    os.makedirs(input_dir, exist_ok=True); os.makedirs(output_dir, exist_ok=True)
-    shutil.copy(input_media, input_dir)
     input_folder_relative = os.path.relpath(input_dir, SEEDVR_DIR)
     output_folder_relative = os.path.relpath(output_dir, SEEDVR_DIR)
-    command = [
-        "torchrun", "--nproc-per-node=4", "projects/inference_seedvr2_3b.py",
-        "--video_path", input_folder_relative, "--output_dir", output_folder_relative,
-        "--seed", str(seed), "--res_h", str(res_h), "--res_w", str(res_w),
-    ]
-    # Limpa as saídas antes de começar
-    yield None, None, "Iniciando inferência do SeedVR2..."
-    # Stream dos logs
-    for log_update in stream_process_to_gradio(command, SEEDVR_DIR):
-        yield None, None, log_update
-    # Encontra o arquivo de resultado
-    output_files = [f for f in os.listdir(output_dir) if f.endswith(('.mp4', '.png', '.jpg', '.jpeg'))]
-    if not output_files: raise gr.Error("Nenhum arquivo de saída do SeedVR2 foi encontrado.")
-    result_path = os.path.join(output_dir, output_files[0])
-    media_type, _ = mimetypes.guess_type(result_path)
-    if media_type and media_type.startswith("image"):
-        yield result_path, None, "Concluído!"
-    else:
-        yield None, result_path, "Concluído!"
-# --- Lógica de Inferência para VINCIE ---
-def run_vincie_inference(input_video, prompt, seed, steps, cfg_scale):
-    if input_video is None: raise gr.Error("Por favor, faça o upload de um vídeo para o VINCIE.")
     job_id = str(uuid.uuid4())
-    temp_dir = os.path.join("/app", "temp_vincie", job_id)
-    os.makedirs(temp_dir, exist_ok=True)
-    input_video_path = os.path.join(temp_dir, os.path.basename(input_video))
-    shutil.copy(input_video, input_video_path)
-    # O VINCIE salva a saída em um diretório com o mesmo nome do vídeo de entrada
-    output_path = os.path.join(temp_dir, "results")
-    command = [
-        "python", "inference.py",
-        "--config", "configs/vincie_1024.yaml",
-        "--pretrained_model_path", "./pretrained_models",
-        "--input_video", input_video_path,
-        "--output_folder", output_path,
-        "--prompt", str(prompt),
-        "--seed", str(seed),
-        "--num_inference_steps", str(int(steps)),
-        "--guidance_scale", str(cfg_scale),
-    ]
-    # Limpa a saída antes de começar
-    yield None, "Iniciando inferência do VINCIE..."
-    # Stream dos logs
-    for log_update in stream_process_to_gradio(command, VINCIE_DIR):
-        yield None, log_update
-    # Encontra o arquivo de resultado (VINCIE tem uma estrutura de saída aninhada)
-    result_files = list(os.walk(output_path))
-    found_file = None
-    for root, _, files in result_files:
-        for file in files:
-            if file.endswith('.mp4'):
-                found_file = os.path.join(root, file)
-                break
-        if found_file:
-            break
-    if not found_file: raise gr.Error("Nenhum arquivo de saída do VINCIE foi encontrado.")
-    yield found_file, "Concluído!"
-# --- Interface Gradio com Abas ---
 with gr.Blocks() as demo:
-    gr.Markdown("# 🚀 Interface de Inferência Unificada")
     with gr.Tabs():
-        with gr.TabItem("SeedVR2 (Restauração de Vídeo/Imagem)"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    seedvr2_input = gr.Video(label="Upload de Vídeo ou Imagem")
-                    seedvr2_seed = gr.Number(value=666, label="Seed")
-                    with gr.Accordion("Configurações Avançadas", open=False):
-                        seedvr2_h = gr.Number(value=720, label="Altura")
-                        seedvr2_w = gr.Number(value=1280, label="Largura")
-                    seedvr2_button = gr.Button("Executar SeedVR2", variant="primary")
                 with gr.Column(scale=2):
-                    seedvr2_img_out = gr.Image(label="Saída de Imagem")
-                    seedvr2_vid_out = gr.Video(label="Saída de Vídeo")
-                    seedvr2_logs = gr.Textbox(label="Logs em Tempo Real", lines=10, autoscroll=True)
-            seedvr2_button.click(
-                fn=run_seedvr2_inference,
-                inputs=[seedvr2_input, seedvr2_seed, seedvr2_h, seedvr2_w],
-                outputs=[seedvr2_img_out, seedvr2_vid_out, seedvr2_logs]
-            )
-            gr.Examples(
-                examples=[["./SeedVR/01.mp4"], ["./SeedVR/02.mp4"]],
-                inputs=[seedvr2_input]
-            )
-        with gr.TabItem("VINCIE (Edição de Vídeo com Prompt)"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    vincie_input = gr.Video(label="Upload de Vídeo")
-                    vincie_prompt = gr.Textbox(label="Prompt", lines=3, placeholder="Ex: a man wearing a spacesuit")
-                    vincie_seed = gr.Number(label="Seed", value=1234)
-                    vincie_steps = gr.Slider(label="Passos de Inferência", minimum=1, maximum=100, value=50, step=1)
-                    vincie_cfg = gr.Slider(label="Escala do CFG", minimum=1.0, maximum=15.0, value=7.5, step=0.5)
-                    vincie_button = gr.Button("Executar VINCIE", variant="primary")
                 with gr.Column(scale=2):
-                    vincie_vid_out = gr.Video(label="Saída de Vídeo")
-                    vincie_logs = gr.Textbox(label="Logs em Tempo Real", lines=10, autoscroll=True)
-            vincie_button.click(
-                fn=run_vincie_inference,
-                inputs=[vincie_input, vincie_prompt, vincie_seed, vincie_steps, vincie_cfg],
-                outputs=[vincie_vid_out, vincie_logs]
-            )
-demo.queue(max_size=10).launch()

+# app.py (Interface final com 3 abas: SeedVR, V-INT Edit, V-INT Pipeline)
 import gradio as gr
 import os
 import subprocess
+import shutil
+import uuid
+from huggingface_hub import snapshot_download
+import spaces
+# --- 1. CONFIGURAÇÃO E DOWNLOAD DOS MODELOS ---
 SEEDVR_DIR = "/app/SeedVR"
 VINCIE_DIR = "/app/VINCIE"
+HF_TOKEN = os.environ.get("HF_TOKEN")
+@spaces.GPU
+def download_models():
+    """Baixa os modelos para SeedVR e V-INT se eles não existirem."""
+    # Download do SeedVR
+    if not os.path.exists(os.path.join(SEEDVR_DIR, "ckpts", "seedvr2_ema_3b.pth")):
+        print("Baixando modelo do SeedVR-3B...")
+        snapshot_download(repo_id="ByteDance-Seed/SeedVR2-3B", local_dir=os.path.join(SEEDVR_DIR, "ckpts"), token=HF_TOKEN, local_dir_use_symlinks=False)
+    else: print("Modelo do SeedVR já existe.")
+    # Download do V-INT
+    if not os.path.exists(os.path.join(VINCIE_DIR, "ckpt", "VINCIE-3B")):
+        print("Baixando modelo do VINCIE-3B...")
+        snapshot_download(repo_id="ByteDance-Seed/VINCIE-3B", local_dir=os.path.join(VINCIE_DIR, "ckpt", "VINCIE-3B"), token=HF_TOKEN, local_dir_use_symlinks=False)
+    else: print("Modelo do VINCIE já existe.")
+    # Download de assets e configs do V-INT (para os exemplos)
+    if not os.path.exists(os.path.join(VINCIE_DIR, "assets", "woman_pineapple.png")):
+         print("Baixando assets e configs do V-INT...")
+         snapshot_download(repo_id="ByteDance-Seed/VINCIE", repo_type="space", local_dir=VINCIE_DIR, token=HF_TOKEN, allow_patterns=["assets/*", "configs/*"], local_dir_use_symlinks=False)
+    else: print("Assets e configs do V-INT já existem.")
+download_models()
+# --- 2. LÓGICA DE INFERÊNCIA ---
+def run_subprocess_with_logs(command, cwd):
+    """Função genérica para rodar um subprocesso e streamar os logs para o Gradio."""
+    log_output = f"Executando comando:\n{' '.join(command)}\n\n"
+    yield [], log_output
+    env = os.environ.copy(); env["PYTHONUNBUFFERED"] = "1"
+    process = subprocess.Popen(command, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8', env=env)
     while True:
         output = process.stdout.readline()
+        if output == '' and process.poll() is not None: break
+        if output: log_output += output; yield [], log_output
+    if process.poll() != 0: raise gr.Error("A inferência falhou. Verifique os logs.")
+def run_seedvr_inference(video_path, seed):
+    # (Lógica específica do SeedVR)
+    if video_path is None: raise gr.Error("Por favor, faça o upload de um arquivo para o SeedVR.")
     job_id = str(uuid.uuid4())
+    input_dir = os.path.join("/app", "temp_inputs", job_id); os.makedirs(input_dir, exist_ok=True)
+    output_dir = os.path.join("/app", "temp_outputs", job_id); os.makedirs(output_dir, exist_ok=True)
+    shutil.copy(video_path, input_dir)
     input_folder_relative = os.path.relpath(input_dir, SEEDVR_DIR)
     output_folder_relative = os.path.relpath(output_dir, SEEDVR_DIR)
+    command = ["torchrun", "--nproc-per-node=4", "projects/inference_seedvr2_3b.py", "--video_path", input_folder_relative, "--output_dir", output_folder_relative, "--seed", str(seed), "--res_h", "720", "--res_w", "1280"]
+    for gallery, logs in run_subprocess_with_logs(command, SEEDVR_DIR):
+        yield None, logs # Retorna None para o output enquanto os logs são atualizados
+    output_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(('.mp4', '.png'))]
+    if not output_files: raise gr.Error("Nenhum arquivo de saída encontrado.")
+    yield output_files[0], logs
+def run_vincie_edit_inference(files, prompts_str):
+    # (Lógica específica para o modo de EDIÇÃO do V-INT)
+    if not files: raise gr.Error("Por favor, faça o upload de uma imagem para editar.")
+    if not prompts_str: raise gr.Error("Por favor, forneça os prompts de edição.")
     job_id = str(uuid.uuid4())
+    output_dir = os.path.join(VINCIE_DIR, "output", job_id)
+    image_paths_str = ", ".join([f'"{f.name}"' for f in files])
+    prompts_list_str = ", ".join([f'"{p.strip()}"' for p in prompts_str.split(';')])
+    command = ["python", "main.py", "configs/generate.yaml", f'generation.positive_prompt.image_path=[{image_paths_str}]', f'generation.positive_prompt.prompts=[{prompts_list_str}]', f'generation.output.dir={output_dir}']
+    for gallery, logs in run_subprocess_with_logs(command, VINCIE_DIR):
+        yield gallery, logs
+    output_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(('.png', '.jpg'))]
+    if not output_files: raise gr.Error("Nenhum arquivo de saída encontrado.")
+    yield output_files, logs
+def run_vincie_pipeline_inference(files, final_prompt):
+    # (NOVA LÓGICA para o modo de PIPELINE do V-INT)
+    if not files: raise gr.Error("Por favor, faça o upload de múltiplas imagens para a composição.")
+    if not final_prompt: raise gr.Error("Por favor, forneça o prompt de composição final.")
+    job_id = str(uuid.uuid4())
+    output_dir = os.path.join(VINCIE_DIR, "output", job_id)
+    # Gera os prompts de placeholder
+    placeholder_prompts = [f"<IMG{i}>: " for i in range(len(files))]
+    all_prompts = placeholder_prompts + [final_prompt]
+    image_paths_str = ", ".join([f'"{f.name}"' for f in files])
+    prompts_list_str = ", ".join([f'"{p}"' for p in all_prompts])
+    command = ["python", "main.py", "configs/generate.yaml", "generation.pad_img_placehoder=False", f'generation.positive_prompt.image_path=[{image_paths_str}]', f'generation.positive_prompt.prompts=[{prompts_list_str}]', f'generation.output.dir={output_dir}']
+    for gallery, logs in run_subprocess_with_logs(command, VINCIE_DIR):
+        yield gallery, logs
+    output_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(('.png', '.jpg'))]
+    if not output_files: raise gr.Error("Nenhum arquivo de saída foi encontrado.")
+    yield output_files, logs
+# --- 3. INTERFACE GRADIO COM 3 ABAS ---
 with gr.Blocks() as demo:
+    gr.Markdown("<h1><center>Super-Space: SeedVR & V-INT</center></h1>")
     with gr.Tabs():
+        # --- ABA SEEDVR ---
+        with gr.TabItem("SeedVR (Restauração de Vídeo)"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    seedvr_input_video = gr.Video(label="Upload de Vídeo")
+                    seedvr_seed = gr.Number(value=666, label="Seed")
+                    seedvr_run_button = gr.Button("Executar SeedVR", variant="primary")
                 with gr.Column(scale=2):
+                    seedvr_output = gr.Video(label="Vídeo Restaurado")
+                    seedvr_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
+            seedvr_run_button.click(fn=run_seedvr_inference, inputs=[seedvr_input_video, seedvr_seed], outputs=[seedvr_output, seedvr_logs])
+        # --- ABA V-INT EDIÇÃO ---
+        with gr.TabItem("V-INT (Edição de Imagem)"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    vincie_edit_input = gr.Files(label="Upload de Imagem(ns) para Editar", file_types=["image"])
+                    vincie_edit_prompts = gr.Textbox(label="Prompts de Edição (separados por ';')", lines=5, placeholder="Ex: Add a crown to her head; Change the background...")
+                    vincie_edit_button = gr.Button("Executar Edição", variant="primary")
                 with gr.Column(scale=2):
+                    vincie_edit_output