SeedVR2-3B

Paused

App Files Files Community

Aduc-sdr commited on Sep 5, 2025

Commit

397e9af

verified ·

1 Parent(s): 9e3a7d0

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -32

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # //
 # // Licensed under the Apache License, Version 2.0 (the "License");
 # // you may not use this file except in compliance with the License.
-# // You may not obtain a copy of the License at
 # //
 # //     http://www.apache.org/licenses/LICENSE-2.0
 # //
@@ -11,22 +11,12 @@
 # // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # // See the License for the specific language governing permissions and
 # // limitations under the License.
-import torch.distributed as dist
 import os
-import gc
-import logging
 import sys
-import subprocess
-from pathlib import Path
-from urllib.parse import urlparse
-from torch.hub import download_url_to_file
-import gradio as gr
-import mediapy
-from einops import rearrange
-import shutil
-from omegaconf import OmegaConf
-# --- ETAPA 1: Clonar o Repositório Oficial do GitHub ---
 repo_name = "SeedVR"
 if not os.path.exists(repo_name):
     print(f"Clonando o repositório {repo_name} do GitHub...")
@@ -36,14 +26,22 @@ if not os.path.exists(repo_name):
 os.chdir(repo_name)
 print(f"Diretório de trabalho alterado para: {os.getcwd()}")
-# Adicionar o diretório ao path do Python para que as importações funcionem
 sys.path.insert(0, os.path.abspath('.'))
 print(f"Diretório atual adicionado ao sys.path.")
-# --- ETAPA 3: Instalar Dependências Conforme as Instruções ---
 python_executable = sys.executable
-print("Instalando dependências do requirements.txt...")
-subprocess.run([python_executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
 print("Instalando flash-attn...")
 subprocess.run([python_executable, "-m", "pip", "install", "flash-attn==2.5.9.post1", "--no-build-isolation"], check=True)
@@ -52,7 +50,6 @@ from pathlib import Path
 from urllib.parse import urlparse
 from torch.hub import download_url_to_file, get_dir
-# Função auxiliar para downloads
 def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
     os.makedirs(model_dir, exist_ok=True)
     if not file_name:
@@ -64,7 +61,6 @@ def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
         download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
     return cached_file
-# Baixar e instalar Apex pré-compilado (crucial para o ambiente do Spaces)
 apex_url = 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl'
 apex_wheel_path = load_file_from_url(url=apex_url)
 print("Instalando Apex a partir do wheel baixado...")
@@ -73,6 +69,8 @@ print("✅ Configuração do Apex concluída.")
 # --- ETAPA 4: Baixar os Modelos Pré-treinados ---
 print("Baixando modelos pré-treinados...")
 pretrain_model_url = {
     'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
     'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
@@ -85,8 +83,8 @@ for key, url in pretrain_model_url.items():
     model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
     load_file_from_url(url=url, model_dir=model_dir)
 # --- ETAPA 5: Executar a Aplicação Principal ---
-import torch
 import mediapy
 from einops import rearrange
 from omegaconf import OmegaConf
@@ -112,16 +110,20 @@ from common.partition import partition_by_size
 from projects.video_diffusion_sr.infer import VideoDiffusionInfer
 from common.distributed.ops import sync_data
 os.environ["MASTER_ADDR"] = "127.0.0.1"
 os.environ["MASTER_PORT"] = "12355"
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
-if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
-    from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
-    use_colorfix = True
-else:
-    use_colorfix = False
 def configure_runner():
     config = load_config('configs_3b/main.yaml')
@@ -136,10 +138,9 @@ def configure_runner():
 def generation_step(runner, text_embeds_dict, cond_latents):
     def _move_to_cuda(x): return [i.to("cuda") for i in x]
-    noises = [torch.randn_like(latent) for latent in cond_latents]
-    aug_noises = [torch.randn_like(latent) for latent in cond_latents]
     noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
-    noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
     def _add_noise(x, aug_noise):
         t = torch.tensor([100.0], device="cuda")
         shape = torch.tensor(x.shape[1:], device="cuda")[None]
@@ -158,9 +159,10 @@ def generation_loop(video_path, seed=666, fps_out=24):
     runner.configure_diffusion()
     set_seed(int(seed))
     os.makedirs("output", exist_ok=True)
-    video_transform = Compose([NaResize(1024), DivisibleCrop(16), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")])
     media_type, _ = mimetypes.guess_type(video_path)
     is_video = media_type and media_type.startswith("video")
     if is_video:
         video, _, _ = read_video(video_path, output_format="TCHW")
         video = video[:121] / 255.0
@@ -168,12 +170,14 @@ def generation_loop(video_path, seed=666, fps_out=24):
     else:
         video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
         output_path = os.path.join("output", f"{uuid.uuid4()}.png")
-    cond_latents = [video_transform(video.to("cuda"))]
     ori_length = cond_latents[0].size(2)
     cond_latents = runner.vae_encode(cond_latents)
     samples = generation_step(runner, text_embeds, cond_latents)
     sample = samples[0][:ori_length].cpu()
     sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).add(1).mul(127.5).byte().numpy()
     if is_video:
         mediapy.write_video(output_path, sample, fps=fps_out)
         return None, output_path, output_path
@@ -182,7 +186,16 @@ def generation_loop(video_path, seed=666, fps_out=24):
         return output_path, None, output_path
 with gr.Blocks(title="SeedVR") as demo:
-    gr.HTML(f"""<div style='text-align:center; margin-bottom: 10px;'><img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;'/></div>...""")
     with gr.Row():
         input_file = gr.File(label="Carregar Imagem ou Vídeo")
         with gr.Column():
@@ -193,5 +206,11 @@ with gr.Blocks(title="SeedVR") as demo:
     output_video = gr.Video(label="Vídeo de Saída")
     download_link = gr.File(label="Baixar Resultado")
     run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
 demo.queue().launch(share=True)

 # //
 # // Licensed under the Apache License, Version 2.0 (the "License");
 # // you may not use this file except in compliance with the License.
+# // You may obtain a copy of the License at
 # //
 # //     http://www.apache.org/licenses/LICENSE-2.0
 # //
 # // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # // See the License for the specific language governing permissions and
 # // limitations under the License.
+import spaces
+import subprocess
 import os
 import sys
+# --- ETAPA 1: Clonar o Repositório do GitHub ---
 repo_name = "SeedVR"
 if not os.path.exists(repo_name):
     print(f"Clonando o repositório {repo_name} do GitHub...")
 os.chdir(repo_name)
 print(f"Diretório de trabalho alterado para: {os.getcwd()}")
 sys.path.insert(0, os.path.abspath('.'))
 print(f"Diretório atual adicionado ao sys.path.")
+# --- ETAPA 3: Instalar Dependências Corretamente ---
 python_executable = sys.executable
+# CORREÇÃO CRÍTICA: Filtrar requirements.txt para evitar conflitos com torch/torchvision
+print("Filtrando requirements.txt para evitar conflitos de versão...")
+with open("requirements.txt", "r") as f_in, open("filtered_requirements.txt", "w") as f_out:
+    for line in f_in:
+        # Ignora as linhas que podem causar conflitos
+        if not line.strip().startswith(('torch', 'torchvision')):
+            f_out.write(line)
+print("Instalando dependências filtradas...")
+subprocess.run([python_executable, "-m", "pip", "install", "-r", "filtered_requirements.txt"], check=True)
 print("Instalando flash-attn...")
 subprocess.run([python_executable, "-m", "pip", "install", "flash-attn==2.5.9.post1", "--no-build-isolation"], check=True)
 from urllib.parse import urlparse
 from torch.hub import download_url_to_file, get_dir
 def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
     os.makedirs(model_dir, exist_ok=True)
     if not file_name:
         download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
     return cached_file
 apex_url = 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl'
 apex_wheel_path = load_file_from_url(url=apex_url)
 print("Instalando Apex a partir do wheel baixado...")
 # --- ETAPA 4: Baixar os Modelos Pré-treinados ---
 print("Baixando modelos pré-treinados...")
+import torch
 pretrain_model_url = {
     'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
     'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
     model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
     load_file_from_url(url=url, model_dir=model_dir)
 # --- ETAPA 5: Executar a Aplicação Principal ---
 import mediapy
 from einops import rearrange
 from omegaconf import OmegaConf
 from projects.video_diffusion_sr.infer import VideoDiffusionInfer
 from common.distributed.ops import sync_data
+torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
+torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
+torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
+print("✅ Setup completo. Iniciando a aplicação...")
 os.environ["MASTER_ADDR"] = "127.0.0.1"
 os.environ["MASTER_PORT"] = "12355"
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
+use_colorfix = os.path.exists("projects/video_diffusion_sr/color_fix.py")
 def configure_runner():
     config = load_config('configs_3b/main.yaml')
 def generation_step(runner, text_embeds_dict, cond_latents):
     def _move_to_cuda(x): return [i.to("cuda") for i in x]
+    noises, aug_noises = [torch.randn_like(l) for l in cond_latents], [torch.randn_like(l) for l in cond_latents]
     noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
+    noises, aug_noises, cond_latents = map(_move_to_cuda, (noises, aug_noises, cond_latents))
     def _add_noise(x, aug_noise):
         t = torch.tensor([100.0], device="cuda")
         shape = torch.tensor(x.shape[1:], device="cuda")[None]
     runner.configure_diffusion()
     set_seed(int(seed))
     os.makedirs("output", exist_ok=True)
+    transform = Compose([NaResize(1024), DivisibleCrop(16), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")])
     media_type, _ = mimetypes.guess_type(video_path)
     is_video = media_type and media_type.startswith("video")
     if is_video:
         video, _, _ = read_video(video_path, output_format="TCHW")
         video = video[:121] / 255.0
     else:
         video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
         output_path = os.path.join("output", f"{uuid.uuid4()}.png")
+    cond_latents = [transform(video.to("cuda"))]
     ori_length = cond_latents[0].size(2)
     cond_latents = runner.vae_encode(cond_latents)
     samples = generation_step(runner, text_embeds, cond_latents)
     sample = samples[0][:ori_length].cpu()
     sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).add(1).mul(127.5).byte().numpy()
     if is_video:
         mediapy.write_video(output_path, sample, fps=fps_out)
         return None, output_path, output_path
         return output_path, None, output_path
 with gr.Blocks(title="SeedVR") as demo:
+    gr.HTML(f"""
+        <div style='text-align:center; margin-bottom: 10px;'>
+            <img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;' alt='SeedVR logo'/>
+        </div>
+        <p><b>Demonstração oficial do Gradio</b> para
+        <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
+        <b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
+        🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
+        </p>
+    """)
     with gr.Row():
         input_file = gr.File(label="Carregar Imagem ou Vídeo")
         with gr.Column():
     output_video = gr.Video(label="Vídeo de Saída")
     download_link = gr.File(label="Baixar Resultado")
     run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
+    gr.Examples(examples=[["01.mp4", 42, 24], ["02.mp4", 42, 24], ["03.mp4", 42, 24]], inputs=[input_file, seed, fps])
+    gr.HTML("""
+        <hr>
+        <p>Se você achou o SeedVR útil, por favor ⭐ o
+        <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>repositório no GitHub</a>.</p>
+    """)
 demo.queue().launch(share=True)