Spaces:
Paused
Paused
| # // Copyright (c) 2025 Bytedance Ltd. and/or its affiliates | |
| # // | |
| # // Licensed under the Apache License, Version 2.0 (the "License"); | |
| # // you may not use this file except in compliance with the License. | |
| # // You may obtain a copy of the License at | |
| # // | |
| # // http://www.apache.org/licenses/LICENSE-2.0 | |
| # // | |
| # // Unless required by applicable law or agreed to in writing, software | |
| # // distributed under the License is distributed on an "AS IS" BASIS, | |
| # // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # // See the License for the specific language governing permissions and | |
| # // limitations under the License. | |
| import spaces | |
| import subprocess | |
| import os | |
| import sys | |
| # --- ETAPA 1: Preparação do Ambiente --- | |
| # Clonar o repositório para garantir que todas as pastas de código (data, common, etc.) existam. | |
| repo_dir_name = "SeedVR2-3B" | |
| if not os.path.exists(repo_dir_name): | |
| print(f"Clonando o repositório {repo_dir_name} para obter todo o código-fonte...") | |
| # Usamos --depth 1 para um clone mais rápido, já que não precisamos do histórico | |
| subprocess.run(f"git clone --depth 1 https://huggingface.co/spaces/ByteDance-Seed/{repo_dir_name}", shell=True, check=True) | |
| # --- ETAPA 2: Configuração dos Caminhos --- | |
| # Mudar para o diretório do repositório e adicioná-lo ao path do Python. | |
| os.chdir(repo_dir_name) | |
| print(f"Diretório de trabalho alterado para: {os.getcwd()}") | |
| sys.path.insert(0, os.path.abspath('.')) | |
| print(f"Diretório atual adicionado ao sys.path para importações.") | |
| # --- ETAPA 3: Instalação de Dependências e Download de Modelos --- | |
| # Agora que estamos no diretório correto, podemos prosseguir. | |
| import torch | |
| from pathlib import Path | |
| from urllib.parse import urlparse | |
| from torch.hub import download_url_to_file, get_dir | |
| # Função de download do original | |
| def load_file_from_url(url, model_dir=None, progress=True, file_name=None): | |
| if model_dir is None: | |
| hub_dir = get_dir() | |
| model_dir = os.path.join(hub_dir, 'checkpoints') | |
| os.makedirs(model_dir, exist_ok=True) | |
| parts = urlparse(url) | |
| filename = os.path.basename(parts.path) | |
| if file_name is not None: | |
| filename = file_name | |
| cached_file = os.path.abspath(os.path.join(model_dir, filename)) | |
| if not os.path.exists(cached_file): | |
| print(f'Baixando: "{url}" para {cached_file}\n') | |
| download_url_to_file(url, cached_file, hash_prefix=None, progress=progress) | |
| return cached_file | |
| # URLs dos modelos | |
| pretrain_model_url = { | |
| 'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth', | |
| 'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth', | |
| 'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt', | |
| 'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt', | |
| } | |
| # Criar diretório de checkpoints e baixar modelos | |
| ckpt_dir = Path('./ckpts') | |
| ckpt_dir.mkdir(exist_ok=True) | |
| for key, url in pretrain_model_url.items(): | |
| filename = os.path.basename(url) | |
| model_dir = './ckpts' if key in ['vae', 'dit'] else '.' | |
| target_path = os.path.join(model_dir, filename) | |
| if not os.path.exists(target_path): | |
| load_file_from_url(url=url, model_dir=model_dir, progress=True, file_name=filename) | |
| # Baixar vídeos de exemplo | |
| torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4') | |
| torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4') | |
| torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4') | |
| # --- REFINAMENTO: Compilar dependências do zero para a GPU L40S (Ada Lovelace) --- | |
| python_executable = sys.executable | |
| print("Instalando flash-attn compilando do zero...") | |
| # Força a reinstalação a partir do zero para garantir que seja compilado para a GPU atual | |
| subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", "flash-attn"], check=True) | |
| print("Clonando e compilando o Apex do zero...") | |
| if not os.path.exists("apex"): | |
| subprocess.run("git clone https://github.com/NVIDIA/apex", shell=True, check=True) | |
| # Instala o Apex a partir da fonte clonada, o que força a compilação para a GPU L40S | |
| # As flags --cpp_ext e --cuda_ext são essenciais para a compilação | |
| subprocess.run( | |
| [python_executable, "-m", "pip", "install", "-v", "--disable-pip-version-check", "--no-cache-dir", "--global-option=--cpp_ext", "--global-option=--cuda_ext", "./apex"], | |
| check=True | |
| ) | |
| print("✅ Configuração do Apex concluída.") | |
| # --- ETAPA 4: Execução do Código Principal da Aplicação --- | |
| import mediapy | |
| from einops import rearrange | |
| from omegaconf import OmegaConf | |
| import datetime | |
| from tqdm import tqdm | |
| import gc | |
| from PIL import Image | |
| import gradio as gr | |
| import uuid | |
| import mimetypes | |
| import torchvision.transforms as T | |
| from torchvision.transforms import Compose, Lambda, Normalize | |
| from torchvision.io.video import read_video | |
| from data.image.transforms.divisible_crop import DivisibleCrop | |
| from data.image.transforms.na_resize import NaResize | |
| from data.video.transforms.rearrange import Rearrange | |
| from common.config import load_config | |
| from common.distributed import init_torch | |
| from common.distributed.advanced import init_sequence_parallel | |
| from common.seed import set_seed | |
| from common.partition import partition_by_size | |
| from projects.video_diffusion_sr.infer import VideoDiffusionInfer | |
| from common.distributed.ops import sync_data | |
| os.environ["MASTER_ADDR"] = "127.0.0.1" | |
| os.environ["MASTER_PORT"] = "12355" | |
| os.environ["RANK"] = str(0) | |
| os.environ["WORLD_SIZE"] = str(1) | |
| # Adiciona uma variável de ambiente que pode ajudar o PyTorch a debugar erros de CUDA | |
| os.environ["CUDA_LAUNCH_BLOCKING"] = "1" | |
| if os.path.exists("projects/video_diffusion_sr/color_fix.py"): | |
| from projects.video_diffusion_sr.color_fix import wavelet_reconstruction | |
| use_colorfix = True | |
| else: | |
| use_colorfix = False | |
| print('Atenção!!!!!! A correção de cor não está disponível!') | |
| def configure_sequence_parallel(sp_size): | |
| if sp_size > 1: | |
| init_sequence_parallel(sp_size) | |
| def configure_runner(sp_size): | |
| config_path = 'configs_3b/main.yaml' | |
| config = load_config(config_path) | |
| runner = VideoDiffusionInfer(config) | |
| OmegaConf.set_readonly(runner.config, False) | |
| init_torch(cudnn_benchmark=False, timeout=datetime.timedelta(seconds=3600)) | |
| configure_sequence_parallel(sp_size) | |
| runner.configure_dit_model(device="cuda", checkpoint='ckpts/seedvr2_ema_3b.pth') | |
| runner.configure_vae_model() | |
| if hasattr(runner.vae, "set_memory_limit"): | |
| runner.vae.set_memory_limit(**runner.config.vae.memory_limit) | |
| return runner | |
| def generation_step(runner, text_embeds_dict, cond_latents): | |
| def _move_to_cuda(x): | |
| return [i.to(torch.device("cuda")) for i in x] | |
| noises = [torch.randn_like(latent) for latent in cond_latents] | |
| aug_noises = [torch.randn_like(latent) for latent in cond_latents] | |
| noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0) | |
| noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents))) | |
| def _add_noise(x, aug_noise): | |
| t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1 | |
| shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None] | |
| t = runner.timestep_transform(t, shape) | |
| return runner.schedule.forward(x, aug_noise, t) | |
| conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)] | |
| with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True): | |
| video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict) | |
| return [rearrange(video, "c t h w -> t c h w") for video in video_tensors] | |
| def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1): | |
| if video_path is None: return None, None, None | |
| runner = configure_runner(1) | |
| def _extract_text_embeds(): | |
| positive_prompts_embeds = [] | |
| for _ in original_videos_local: | |
| positive_prompts_embeds.append({"texts_pos": [torch.load('pos_emb.pt')], "texts_neg": [torch.load('neg_emb.pt')]}) | |
| gc.collect(); torch.cuda.empty_cache() | |
| return positive_prompts_embeds | |
| runner.config.diffusion.cfg.scale, runner.config.diffusion.cfg.rescale, runner.config.diffusion.timesteps.sampling.steps = cfg_scale, cfg_rescale, sample_steps | |
| runner.configure_diffusion() | |
| set_seed(int(seed) % (2**32), same_across_ranks=True) | |
| os.makedirs("output", exist_ok=True) | |
| original_videos = [os.path.basename(video_path)] | |
| original_videos_local = partition_by_size(original_videos, batch_size) | |
| positive_prompts_embeds = _extract_text_embeds() | |
| video_transform = Compose([NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False), Lambda(lambda x: torch.clamp(x, 0.0, 1.0)), DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")]) | |
| for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)): | |
| media_type, _ = mimetypes.guess_type(video_path) | |
| is_video = media_type and media_type.startswith("video") | |
| if is_video: | |
| video, _, _ = read_video(video_path, output_format="TCHW"); video = video[:121] / 255.0; output_dir = os.path.join("output", f"{uuid.uuid4()}.mp4") | |
| else: | |
| video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0); output_dir = os.path.join("output", f"{uuid.uuid4()}.png") | |
| cond_latents = [video_transform(video.to("cuda"))] | |
| ori_lengths = [v.size(1) for v in cond_latents] | |
| cond_latents = runner.vae_encode(cond_latents) | |
| for key in ["texts_pos", "texts_neg"]: | |
| for i, emb in enumerate(text_embeds[key]): text_embeds[key][i] = emb.to("cuda") | |
| samples = generation_step(runner, text_embeds, cond_latents=cond_latents) | |
| del cond_latents | |
| for sample, ori_length in zip(samples, ori_lengths): | |
| sample = sample[:ori_length].to("cpu") | |
| sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy() | |
| if is_video: mediapy.write_video(output_dir, sample, fps=fps_out) | |
| else: mediapy.write_image(output_dir, sample[0]) | |
| gc.collect(); torch.cuda.empty_cache() | |
| return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir) | |
| with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo: | |
| gr.HTML(f""" | |
| <p><b>Demonstração oficial do Gradio</b> para | |
| <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'> | |
| <b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br> | |
| 🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC. | |
| </p> | |
| """) | |
| with gr.Row(): | |
| input_file = gr.File(label="Carregar imagem ou vídeo") | |
| with gr.Column(): | |
| seed = gr.Number(label="Seed", value=666) | |
| fps = gr.Number(label="FPS de Saída (para vídeo)", value=24) | |
| run_button = gr.Button("Executar") | |
| with gr.Row(): | |
| output_image = gr.Image(label="Imagem de Saída") | |
| output_video = gr.Video(label="Vídeo de Saída") | |
| download_link = gr.File(label="Baixar o resultado") | |
| run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link]) | |
| gr.Examples( | |
| examples=[ | |
| ["01.mp4", 4, 24], | |
| ["02.mp4", 4, 24], | |
| ["03.mp4", 4, 24], | |
| ], | |
| inputs=[input_file, seed, fps] | |
| ) | |
| gr.HTML(""" | |
| <hr> | |
| <p>Se você achou o SeedVR útil, por favor ⭐ o | |
| <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>repositório no GitHub</a>:</p> | |
| <a href="https://github.com/ByteDance-Seed/SeedVR" target="_blank"> | |
| <img src="https://img.shields.io/github/stars/ByteDance-Seed/SeedVR?style=social" alt="GitHub Stars"> | |
| </a> | |
| <h4>Aviso</h4> | |
| <p>Esta demonstração suporta até <b>720p e 121 frames para vídeos ou imagens 2k</b>. | |
| Para outros casos de uso, verifique o <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>repositório no GitHub</a>.</p> | |
| <h4>Limitações</h4> | |
| <p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p> | |
| """) | |
| demo.queue().launch(share=True) |