SeedVR2-7B

Runtime error

App Files Files Community

Aduc-sdr commited on Sep 5, 2025

Commit

4891df0

verified ·

1 Parent(s): 0ea462c

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -30

app.py CHANGED Viewed

@@ -32,12 +32,10 @@ print(f"Diretório atual adicionado ao sys.path.")
 # --- ETAPA 3: Instalar Dependências Corretamente ---
 python_executable = sys.executable
-# CORREÇÃO: Forçar uma versão do NumPy < 2.0 para evitar conflitos de compatibilidade.
 print("Instalando NumPy compatível...")
 subprocess.run([python_executable, "-m", "pip", "install", "numpy<2.0"], check=True)
-# Filtrar requirements.txt para evitar conflitos com torch/torchvision pré-instalados
-print("Filtrando requirements.txt...")
 with open("requirements.txt", "r") as f_in, open("filtered_requirements.txt", "w") as f_out:
     for line in f_in:
         if not line.strip().startswith(('torch', 'torchvision')):
@@ -52,6 +50,7 @@ subprocess.run([python_executable, "-m", "pip", "install", "flash-attn==2.5.9.po
 from pathlib import Path
 from urllib.parse import urlparse
 from torch.hub import download_url_to_file, get_dir
 def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
     os.makedirs(model_dir, exist_ok=True)
@@ -72,14 +71,11 @@ print("✅ Configuração do Apex concluída.")
 # --- ETAPA 4: Baixar os Modelos Pré-treinados ---
 print("Baixando modelos pré-treinados...")
-import torch
 pretrain_model_url = {
-	'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR-7B/resolve/main/ema_vae.pth',
-	'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR-7B/resolve/main/seedvr_ema_7b.pth',
     'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
     'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt',
-    #'apex': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp39-cp39-linux_x86_64.whl'
 }
 Path('./ckpts').mkdir(exist_ok=True)
@@ -87,8 +83,12 @@ for key, url in pretrain_model_url.items():
     model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
     load_file_from_url(url=url, model_dir=model_dir)
-# --- ETAPA 5: Executar a Aplicação Principal ---
 import mediapy
 from einops import rearrange
 from omegaconf import OmegaConf
@@ -122,16 +122,24 @@ if use_colorfix:
     from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
 def configure_runner():
-    config = load_config('configs_7b/main.yaml')
     runner = VideoDiffusionInfer(config)
     OmegaConf.set_readonly(runner.config, False)
     init_torch(cudnn_benchmark=False, timeout=datetime.timedelta(seconds=3600))
-    runner.configure_dit_model(device="cuda", checkpoint='ckpts/seedvr2_ema_7b.pth')
     runner.configure_vae_model()
     if hasattr(runner.vae, "set_memory_limit"):
         runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
     return runner
 def generation_step(runner, text_embeds_dict, cond_latents):
     def _move_to_cuda(x): return [i.to("cuda") for i in x]
     noises, aug_noises = [torch.randn_like(l) for l in cond_latents], [torch.randn_like(l) for l in cond_latents]
@@ -147,11 +155,27 @@ def generation_step(runner, text_embeds_dict, cond_latents):
         video_tensors = runner.inference(noises=noises, conditions=conditions, **text_embeds_dict)
     return [rearrange(v, "c t h w -> t c h w") for v in video_tensors]
 @spaces.GPU
 def generation_loop(video_path, seed=666, fps_out=24):
     if video_path is None: return None, None, None
-    runner = configure_runner()
-    # Adicionado `weights_only=True` para segurança e para suprimir o aviso
     text_embeds = {
         "texts_pos": [torch.load('pos_emb.pt', weights_only=True).to("cuda")],
         "texts_neg": [torch.load('neg_emb.pt', weights_only=True).to("cuda")]
@@ -160,31 +184,36 @@ def generation_loop(video_path, seed=666, fps_out=24):
     set_seed(int(seed))
     os.makedirs("output", exist_ok=True)
-    # CORREÇÃO: Fornecer os argumentos que faltam para NaResize.
     res_h, res_w = 1280, 720
     transform = Compose([
         NaResize(resolution=(res_h * res_w)**0.5, mode="area", downsample_only=False),
         Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
-        DivisibleCrop((16, 16)),
-        Normalize(0.5, 0.5),
-        Rearrange("t c h w -> c t h w")
     ])
     media_type, _ = mimetypes.guess_type(video_path)
     is_video = media_type and media_type.startswith("video")
     if is_video:
-        video, _, _ = read_video(video_path, output_format="TCHW")
-        video = video[:121] / 255.0
         output_path = os.path.join("output", f"{uuid.uuid4()}.mp4")
     else:
         video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
         output_path = os.path.join("output", f"{uuid.uuid4()}.png")
-    cond_latents = [transform(video.to("cuda"))]
-    ori_length = cond_latents[0].size(2)
     cond_latents = runner.vae_encode(cond_latents)
     samples = generation_step(runner, text_embeds, cond_latents)
     sample = samples[0][:ori_length].cpu()
     sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).add(1).mul(127.5).byte().numpy()
@@ -196,14 +225,7 @@ def generation_loop(video_path, seed=666, fps_out=24):
         return output_path, None, output_path
 with gr.Blocks(title="SeedVR") as demo:
-    gr.HTML(f"""
-        <p><b>Demonstração oficial do Gradio</b> para
-        <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
-        <b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
-        🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
-        </p>
-    """)
     with gr.Row():
         input_file = gr.File(label="Carregar Imagem ou Vídeo")
         with gr.Column():
@@ -214,6 +236,7 @@ with gr.Blocks(title="SeedVR") as demo:
     output_video = gr.Video(label="Vídeo de Saída")
     download_link = gr.File(label="Baixar Resultado")
     run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
 demo.queue().launch(share=True)

 # --- ETAPA 3: Instalar Dependências Corretamente ---
 python_executable = sys.executable
 print("Instalando NumPy compatível...")
 subprocess.run([python_executable, "-m", "pip", "install", "numpy<2.0"], check=True)
+print("Filtrando requirements.txt para evitar conflitos de versão...")
 with open("requirements.txt", "r") as f_in, open("filtered_requirements.txt", "w") as f_out:
     for line in f_in:
         if not line.strip().startswith(('torch', 'torchvision')):
 from pathlib import Path
 from urllib.parse import urlparse
 from torch.hub import download_url_to_file, get_dir
+import torch
 def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
     os.makedirs(model_dir, exist_ok=True)
 # --- ETAPA 4: Baixar os Modelos Pré-treinados ---
 print("Baixando modelos pré-treinados...")
 pretrain_model_url = {
+    'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
+    'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
     'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
     'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt',
 }
 Path('./ckpts').mkdir(exist_ok=True)
     model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
     load_file_from_url(url=url, model_dir=model_dir)
+torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
+torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
+torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
+# --- ETAPA 5: Inicialização Global do Modelo (FEITA APENAS UMA VEZ) ---
+print("Inicializando o modelo e o ambiente distribuído (uma única vez)...")
 import mediapy
 from einops import rearrange
 from omegaconf import OmegaConf
     from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
 def configure_runner():
+    config = load_config('configs_3b/main.yaml')
     runner = VideoDiffusionInfer(config)
     OmegaConf.set_readonly(runner.config, False)
+    # A chamada de inicialização crítica é feita aqui
     init_torch(cudnn_benchmark=False, timeout=datetime.timedelta(seconds=3600))
+    runner.configure_dit_model(device="cuda", checkpoint='ckpts/seedvr2_ema_3b.pth')
     runner.configure_vae_model()
     if hasattr(runner.vae, "set_memory_limit"):
         runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
     return runner
+# Criamos o runner globalmente, UMA ÚNICA VEZ
+GLOBAL_RUNNER = configure_runner()
+print("✅ Setup completo. Aplicação pronta para receber requisições.")
+# --- ETAPA 6: Funções de Inferência e UI do Gradio ---
 def generation_step(runner, text_embeds_dict, cond_latents):
     def _move_to_cuda(x): return [i.to("cuda") for i in x]
     noises, aug_noises = [torch.randn_like(l) for l in cond_latents], [torch.randn_like(l) for l in cond_latents]
         video_tensors = runner.inference(noises=noises, conditions=conditions, **text_embeds_dict)
     return [rearrange(v, "c t h w -> t c h w") for v in video_tensors]
+def cut_videos(videos, sp_size=1):
+    t = videos.size(1)
+    if t > 121:
+        videos = videos[:, :121]
+        t = 121
+    if (t - 1) % (4 * sp_size) == 0:
+        return videos
+    else:
+        padding_needed = 4 * sp_size - ((t - 1) % (4 * sp_size))
+        last_frame = videos[:, -1].unsqueeze(1)
+        padding = last_frame.repeat(1, padding_needed, 1, 1)
+        videos = torch.cat([videos, padding], dim=1)
+        assert (videos.size(1) - 1) % (4 * sp_size) == 0
+        return videos
 @spaces.GPU
 def generation_loop(video_path, seed=666, fps_out=24):
     if video_path is None: return None, None, None
+    # CORREÇÃO: Usamos o runner global em vez de criar um novo
+    runner = GLOBAL_RUNNER
     text_embeds = {
         "texts_pos": [torch.load('pos_emb.pt', weights_only=True).to("cuda")],
         "texts_neg": [torch.load('neg_emb.pt', weights_only=True).to("cuda")]
     set_seed(int(seed))
     os.makedirs("output", exist_ok=True)
     res_h, res_w = 1280, 720
     transform = Compose([
         NaResize(resolution=(res_h * res_w)**0.5, mode="area", downsample_only=False),
         Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
+        DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")
     ])
     media_type, _ = mimetypes.guess_type(video_path)
     is_video = media_type and media_type.startswith("video")
     if is_video:
+        video, _, _ = read_video(video_path, output_format="TCHW", pts_unit="sec")
+        video = video / 255.0
         output_path = os.path.join("output", f"{uuid.uuid4()}.mp4")
     else:
         video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
         output_path = os.path.join("output", f"{uuid.uuid4()}.png")
+    transformed_video = transform(video.to("cuda"))
+    ori_length = transformed_video.size(1)
+    if is_video:
+        padded_video = cut_videos(transformed_video)
+        cond_latents = [padded_video]
+    else:
+        cond_latents = [transformed_video]
     cond_latents = runner.vae_encode(cond_latents)
     samples = generation_step(runner, text_embeds, cond_latents)
     sample = samples[0][:ori_length].cpu()
     sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).add(1).mul(127.5).byte().numpy()
         return output_path, None, output_path
 with gr.Blocks(title="SeedVR") as demo:
+    gr.HTML(f"""<div style='text-align:center; margin-bottom: 10px;'><img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;'/></div>...""")
     with gr.Row():
         input_file = gr.File(label="Carregar Imagem ou Vídeo")
         with gr.Column():
     output_video = gr.Video(label="Vídeo de Saída")
     download_link = gr.File(label="Baixar Resultado")
     run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
+    gr.Examples(examples=[["01.mp4", 42, 24], ["02.mp4", 42, 24], ["03.mp4", 42, 24]], inputs=[input_file, seed, fps])
+    gr.HTML("""<hr>...""")
 demo.queue().launch(share=True)