File size: 13,607 Bytes
d4386a4
2667dea
6bfaaa7
2667dea
 
d4386a4
 
 
 
 
 
2667dea
 
30f5979
d4386a4
2667dea
d4386a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eecde3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4386a4
 
 
 
30f5979
d4386a4
 
30f5979
 
 
d4386a4
 
30f5979
d4386a4
30f5979
d4386a4
 
 
6bfaaa7
d4386a4
 
 
30f5979
 
 
d4386a4
30f5979
d4386a4
 
30f5979
d4386a4
 
 
2667dea
d4386a4
 
 
 
6bfaaa7
30f5979
d4386a4
30f5979
d4386a4
 
 
30f5979
d4386a4
 
30f5979
d4386a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30f5979
d4386a4
 
 
 
 
 
 
 
 
 
 
 
30f5979
 
d4386a4
30f5979
 
d4386a4
 
30f5979
 
d4386a4
 
 
30f5979
d4386a4
 
 
 
 
 
30f5979
 
d4386a4
 
 
30f5979
eecde3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# app.py (Interface final com 3 abas: SeedVR, V-INT Edit, V-INT Pipeline)

import gradio as gr
import os
import subprocess
import shutil
import uuid
from huggingface_hub import snapshot_download
import spaces

# --- 1. CONFIGURAÇÃO E DOWNLOAD DOS MODELOS ---

SEEDVR_DIR = "/app/SeedVR"
VINCIE_DIR = "/app/VINCIE"
HF_TOKEN = os.environ.get("HF_TOKEN")

@spaces.GPU
def download_models():
    """Baixa os modelos para SeedVR e V-INT se eles não existirem."""
    # Download do SeedVR
    if not os.path.exists(os.path.join(SEEDVR_DIR, "ckpts", "seedvr2_ema_3b.pth")):
        print("Baixando modelo do SeedVR-3B...")
        snapshot_download(repo_id="ByteDance-Seed/SeedVR2-3B", local_dir=os.path.join(SEEDVR_DIR, "ckpts"), token=HF_TOKEN, local_dir_use_symlinks=False)
    else: print("Modelo do SeedVR já existe.")

    # Download do V-INT
    if not os.path.exists(os.path.join(VINCIE_DIR, "ckpt", "VINCIE-3B")):
        print("Baixando modelo do VINCIE-3B...")
        snapshot_download(repo_id="ByteDance-Seed/VINCIE-3B", local_dir=os.path.join(VINCIE_DIR, "ckpt", "VINCIE-3B"), token=HF_TOKEN, local_dir_use_symlinks=False)
    else: print("Modelo do VINCIE já existe.")

    # Download de assets e configs do V-INT (para os exemplos)
    if not os.path.exists(os.path.join(VINCIE_DIR, "assets", "woman_pineapple.png")):
         print("Baixando assets e configs do V-INT...")
         snapshot_download(repo_id="ByteDance-Seed/VINCIE", repo_type="space", local_dir=VINCIE_DIR, token=HF_TOKEN, allow_patterns=["assets/*", "configs/*"], local_dir_use_symlinks=False)
    else: print("Assets e configs do V-INT já existem.")

download_models()

# --- 2. LÓGICA DE INFERÊNCIA ---

def run_subprocess_with_logs(command, cwd):
    """Função genérica para rodar um subprocesso e streamar os logs para o Gradio."""
    log_output = f"Executando comando:\n{' '.join(command)}\n\n"
    yield [], log_output
    
    env = os.environ.copy(); env["PYTHONUNBUFFERED"] = "1"
    process = subprocess.Popen(command, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8', env=env)
    
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None: break
        if output: log_output += output; yield [], log_output
            
    if process.poll() != 0: raise gr.Error("A inferência falhou. Verifique os logs.")

def run_seedvr_inference(video_path, seed):
    # (Lógica específica do SeedVR)
    if video_path is None: raise gr.Error("Por favor, faça o upload de um arquivo para o SeedVR.")
    job_id = str(uuid.uuid4())
    input_dir = os.path.join("/app", "temp_inputs", job_id); os.makedirs(input_dir, exist_ok=True)
    output_dir = os.path.join("/app", "temp_outputs", job_id); os.makedirs(output_dir, exist_ok=True)
    shutil.copy(video_path, input_dir)
    
    input_folder_relative = os.path.relpath(input_dir, SEEDVR_DIR)
    output_folder_relative = os.path.relpath(output_dir, SEEDVR_DIR)
    command = ["torchrun", "--nproc-per-node=4", "projects/inference_seedvr2_3b.py", "--video_path", input_folder_relative, "--output_dir", output_folder_relative, "--seed", str(seed), "--res_h", "720", "--res_w", "1280"]
    
    for gallery, logs in run_subprocess_with_logs(command, SEEDVR_DIR):
        yield None, logs # Retorna None para o output enquanto os logs são atualizados
    
    output_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(('.mp4', '.png'))]
    if not output_files: raise gr.Error("Nenhum arquivo de saída encontrado.")
    yield output_files[0], logs

def run_vincie_edit_inference(files, prompts_str):
    # (Lógica específica para o modo de EDIÇÃO do V-INT)
    if not files: raise gr.Error("Por favor, faça o upload de uma imagem para editar.")
    if not prompts_str: raise gr.Error("Por favor, forneça os prompts de edição.")

# app.py (Interface final com 3 abas: SeedVR, V-INT Edit, V-INT Pipeline)

import gradio as gr
import os
import subprocess
import shutil
import uuid
from huggingface_hub import snapshot_download
import spaces

# --- 1. CONFIGURAÇÃO E DOWNLOAD DOS MODELOS ---

SEEDVR_DIR = "/app/SeedVR"
VINCIE_DIR = "/app/VINCIE"
HF_TOKEN = os.environ.get("HF_TOKEN")

@spaces.GPU
def download_models():
    """Baixa os modelos para SeedVR e V-INT se eles não existirem."""
    # Download do SeedVR
    if not os.path.exists(os.path.join(SEEDVR_DIR, "ckpts", "seedvr2_ema_3b.pth")):
        print("Baixando modelo do SeedVR-3B...")
        snapshot_download(repo_id="ByteDance-Seed/SeedVR2-3B", local_dir=os.path.join(SEEDVR_DIR, "ckpts"), token=HF_TOKEN, local_dir_use_symlinks=False)
    else: print("Modelo do SeedVR já existe.")

    # Download do V-INT
    if not os.path.exists(os.path.join(VINCIE_DIR, "ckpt", "VINCIE-3B")):
        print("Baixando modelo do VINCIE-3B...")
        snapshot_download(repo_id="ByteDance-Seed/VINCIE-3B", local_dir=os.path.join(VINCIE_DIR, "ckpt", "VINCIE-3B"), token=HF_TOKEN, local_dir_use_symlinks=False)
    else: print("Modelo do VINCIE já existe.")

    # Download de assets e configs do V-INT (para os exemplos)
    if not os.path.exists(os.path.join(VINCIE_DIR, "assets", "woman_pineapple.png")):
         print("Baixando assets e configs do V-INT...")
         snapshot_download(repo_id="ByteDance-Seed/VINCIE", repo_type="space", local_dir=VINCIE_DIR, token=HF_TOKEN, allow_patterns=["assets/*", "configs/*"], local_dir_use_symlinks=False)
    else: print("Assets e configs do V-INT já existem.")

download_models()

# --- 2. LÓGICA DE INFERÊNCIA ---

def run_subprocess_with_logs(command, cwd):
    """Função genérica para rodar um subprocesso e streamar os logs para o Gradio."""
    log_output = f"Executando comando:\n{' '.join(command)}\n\n"
    yield [], log_output
    
    env = os.environ.copy(); env["PYTHONUNBUFFERED"] = "1"
    process = subprocess.Popen(command, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8', env=env)
    
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None: break
        if output: log_output += output; yield [], log_output
            
    if process.poll() != 0: raise gr.Error("A inferência falhou. Verifique os logs.")

def run_seedvr_inference(video_path, seed):
    # (Lógica específica do SeedVR)
    if video_path is None: raise gr.Error("Por favor, faça o upload de um arquivo para o SeedVR.")
    job_id = str(uuid.uuid4())
    input_dir = os.path.join("/app", "temp_inputs", job_id); os.makedirs(input_dir, exist_ok=True)
    output_dir = os.path.join("/app", "temp_outputs", job_id); os.makedirs(output_dir, exist_ok=True)
    shutil.copy(video_path, input_dir)
    
    input_folder_relative = os.path.relpath(input_dir, SEEDVR_DIR)
    output_folder_relative = os.path.relpath(output_dir, SEEDVR_DIR)
    command = ["torchrun", "--nproc-per-node=4", "projects/inference_seedvr2_3b.py", "--video_path", input_folder_relative, "--output_dir", output_folder_relative, "--seed", str(seed), "--res_h", "720", "--res_w", "1280"]
    
    for gallery, logs in run_subprocess_with_logs(command, SEEDVR_DIR):
        yield None, logs # Retorna None para o output enquanto os logs são atualizados
    
    output_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(('.mp4', '.png'))]
    if not output_files: raise gr.Error("Nenhum arquivo de saída encontrado.")
    yield output_files[0], logs

def run_vincie_edit_inference(files, prompts_str):
    # (Lógica específica para o modo de EDIÇÃO do V-INT)
    if not files: raise gr.Error("Por favor, faça o upload de uma imagem para editar.")
    if not prompts_str: raise gr.Error("Por favor, forneça os prompts de edição.")

    job_id = str(uuid.uuid4())
    output_dir = os.path.join(VINCIE_DIR, "output", job_id)
    
    image_paths_str = ", ".join([f'"{f.name}"' for f in files])
    prompts_list_str = ", ".join([f'"{p.strip()}"' for p in prompts_str.split(';')])
    command = ["python", "main.py", "configs/generate.yaml", f'generation.positive_prompt.image_path=[{image_paths_str}]', f'generation.positive_prompt.prompts=[{prompts_list_str}]', f'generation.output.dir={output_dir}']
    
    for gallery, logs in run_subprocess_with_logs(command, VINCIE_DIR):
        yield gallery, logs
    
    output_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(('.png', '.jpg'))]
    if not output_files: raise gr.Error("Nenhum arquivo de saída encontrado.")
    yield output_files, logs

def run_vincie_pipeline_inference(files, final_prompt):
    # (NOVA LÓGICA para o modo de PIPELINE do V-INT)
    if not files: raise gr.Error("Por favor, faça o upload de múltiplas imagens para a composição.")
    if not final_prompt: raise gr.Error("Por favor, forneça o prompt de composição final.")

    job_id = str(uuid.uuid4())
    output_dir = os.path.join(VINCIE_DIR, "output", job_id)

    # Gera os prompts de placeholder
    placeholder_prompts = [f"<IMG{i}>: " for i in range(len(files))]
    all_prompts = placeholder_prompts + [final_prompt]

    image_paths_str = ", ".join([f'"{f.name}"' for f in files])
    prompts_list_str = ", ".join([f'"{p}"' for p in all_prompts])
    command = ["python", "main.py", "configs/generate.yaml", "generation.pad_img_placehoder=False", f'generation.positive_prompt.image_path=[{image_paths_str}]', f'generation.positive_prompt.prompts=[{prompts_list_str}]', f'generation.output.dir={output_dir}']

    for gallery, logs in run_subprocess_with_logs(command, VINCIE_DIR):
        yield gallery, logs

    output_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(('.png', '.jpg'))]
    if not output_files: raise gr.Error("Nenhum arquivo de saída foi encontrado.")
    yield output_files, logs

# --- 3. INTERFACE GRADIO COM 3 ABAS ---

with gr.Blocks() as demo:
    gr.Markdown("<h1><center>Super-Space: SeedVR & V-INT</center></h1>")
    
    with gr.Tabs():
        # --- ABA SEEDVR ---
        with gr.TabItem("SeedVR (Restauração de Vídeo)"):
            with gr.Row():
                with gr.Column(scale=1):
                    seedvr_input_video = gr.Video(label="Upload de Vídeo")
                    seedvr_seed = gr.Number(value=666, label="Seed")
                    seedvr_run_button = gr.Button("Executar SeedVR", variant="primary")
                with gr.Column(scale=2):
                    seedvr_output = gr.Video(label="Vídeo Restaurado")
                    seedvr_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
            seedvr_run_button.click(fn=run_seedvr_inference, inputs=[seedvr_input_video, seedvr_seed], outputs=[seedvr_output, seedvr_logs])

        # --- ABA V-INT EDIÇÃO ---
        with gr.TabItem("V-INT (Edição de Imagem)"):
            with gr.Row():
                with gr.Column(scale=1):
                    vincie_edit_input = gr.Files(label="Upload de Imagem(ns) para Editar", file_types=["image"])
                    vincie_edit_prompts = gr.Textbox(label="Prompts de Edição (separados por ';')", lines=5, placeholder="Ex: Add a crown to her head; Change the background...")
                    vincie_edit_button = gr.Button("Executar Edição", variant="primary")
                with gr.Column(scale=2):
                    vincie_edit_output = gr.Gallery(label="Imagens Editadas")
                    vincie_edit_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
            gr.Examples(examples=[[[os.path.join(VINCIE_DIR, "assets/woman_pineapple.png")], "Lower the pineapple beside her face; Add a crown to the woman's head."]], inputs=[vincie_edit_input, vincie_edit_prompts])
            vincie_edit_button.click(fn=run_vincie_edit_inference, inputs=[vincie_edit_input, vincie_edit_prompts], outputs=[vincie_edit_output, vincie_edit_logs])

        # --- NOVA ABA V-INT PIPELINE ---
        with gr.TabItem("V-INT (Pipeline/Composição)"):
            with gr.Row():
                with gr.Column(scale=1):
                    vincie_pipe_inputs = gr.Files(label="Upload de Múltiplas Imagens para Composição (<IMG0>, <IMG1>, etc.)", file_types=["image"])
                    vincie_pipe_prompt = gr.Textbox(label="Prompt de Composição Final", lines=5, placeholder="Ex: Based on <IMG0> and <IMG1>, a woman in <IMG0> is holding the cat from <IMG1>. Output <IMG2>:")
                    vincie_pipe_button = gr.Button("Executar Pipeline", variant="primary")
                with gr.Column(scale=2):
                    vincie_pipe_output = gr.Gallery(label="Imagem Composta")
                    vincie_pipe_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
            
            # Prepara os caminhos para os arquivos de exemplo
            father_path = os.path.join(VINCIE_DIR, "assets/father.png")
            mother_path = os.path.join(VINCIE_DIR, "assets/mother.png")
            son_path = os.path.join(VINCIE_DIR, "assets/son.png")
            family_prompt = "Based on <IMG0>, <IMG1>, and <IMG2>, A smiling family with the father from <IMG0>, mother from <IMG1>, and son from <IMG2>, poses for a portrait amidst the sunlit trees. Output <IMG3>:"
            gr.Examples(examples=[[[father_path, mother_path, son_path], family_prompt]], inputs=[vincie_pipe_inputs, vincie_pipe_prompt])
            
            vincie_pipe_button.click(fn=run_vincie_pipeline_inference, inputs=[vincie_pipe_inputs, vincie_pipe_prompt], outputs=[vincie_pipe_output, vincie_pipe_logs])

demo.queue().launch()