Aduc-sdr commited on
Commit
3092733
·
verified ·
1 Parent(s): 65a16d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -75
app.py CHANGED
@@ -28,11 +28,8 @@ if not os.path.exists(repo_dir_name):
28
  # --- ETAPA 2: Configuração dos Caminhos ---
29
  # Mudar para o diretório do repositório e adicioná-lo ao path do Python.
30
 
31
- # Mudar para o diretório do repositório. ESSENCIAL para caminhos de arquivos relativos.
32
  os.chdir(repo_dir_name)
33
  print(f"Diretório de trabalho alterado para: {os.getcwd()}")
34
-
35
- # Adicionar o diretório ao sys.path. ESSENCIAL para as importações de módulos.
36
  sys.path.insert(0, os.path.abspath('.'))
37
  print(f"Diretório atual adicionado ao sys.path para importações.")
38
 
@@ -43,7 +40,6 @@ import torch
43
  from pathlib import Path
44
  from urllib.parse import urlparse
45
  from torch.hub import download_url_to_file, get_dir
46
- import shlex
47
 
48
  # Função de download do original
49
  def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
@@ -70,10 +66,10 @@ pretrain_model_url = {
70
  'apex': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl'
71
  }
72
 
 
73
  # Criar diretório de checkpoints e baixar modelos
74
  ckpt_dir = Path('./ckpts')
75
  ckpt_dir.mkdir(exist_ok=True)
76
-
77
  for key, url in pretrain_model_url.items():
78
  filename = os.path.basename(url)
79
  model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
@@ -85,23 +81,27 @@ for key, url in pretrain_model_url.items():
85
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
86
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
87
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
88
- torch.hub.download_url_to_file('https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl', 'apex-0.1-cp310-cp310-linux_x86_64.whl')
89
 
90
- # Instalar dependências de forma robusta
91
  python_executable = sys.executable
92
- subprocess.run([python_executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"], env={**os.environ, "FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, check=True)
93
 
94
- apex_wheel_path = "apex-0.1-cp310-cp310-linux_x86_64.whl"
95
- if os.path.exists(apex_wheel_path):
96
- print("Instalando o Apex a partir do arquivo wheel...")
97
- subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", apex_wheel_path], check=True)
98
- print("✅ Configuração do Apex concluída.")
99
- else:
100
- print(f"AVISO: O arquivo wheel do Apex '{apex_wheel_path}' não foi encontrado no repositório clonado.")
101
 
102
- # --- ETAPA 4: Execução do Código Principal da Aplicação ---
103
- # Agora que o ambiente está perfeito, importamos e executamos o resto do script.
 
 
 
 
 
 
 
 
 
104
 
 
105
  import mediapy
106
  from einops import rearrange
107
  from omegaconf import OmegaConf
@@ -131,6 +131,8 @@ os.environ["MASTER_ADDR"] = "127.0.0.1"
131
  os.environ["MASTER_PORT"] = "12355"
132
  os.environ["RANK"] = str(0)
133
  os.environ["WORLD_SIZE"] = str(1)
 
 
134
 
135
  if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
136
  from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
@@ -159,122 +161,80 @@ def configure_runner(sp_size):
159
  def generation_step(runner, text_embeds_dict, cond_latents):
160
  def _move_to_cuda(x):
161
  return [i.to(torch.device("cuda")) for i in x]
162
-
163
  noises = [torch.randn_like(latent) for latent in cond_latents]
164
  aug_noises = [torch.randn_like(latent) for latent in cond_latents]
165
  noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
166
  noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
167
-
168
  def _add_noise(x, aug_noise):
169
  t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1
170
  shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
171
  t = runner.timestep_transform(t, shape)
172
  return runner.schedule.forward(x, aug_noise, t)
173
-
174
  conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)]
175
-
176
  with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
177
  video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict)
178
-
179
  return [rearrange(video, "c t h w -> t c h w") for video in video_tensors]
180
 
181
-
182
  def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
183
- if video_path is None:
184
- return None, None, None
185
-
186
  runner = configure_runner(1)
187
-
188
  def _extract_text_embeds():
189
  positive_prompts_embeds = []
190
  for _ in original_videos_local:
191
- positive_prompts_embeds.append({
192
- "texts_pos": [torch.load('pos_emb.pt')],
193
- "texts_neg": [torch.load('neg_emb.pt')]
194
- })
195
  gc.collect(); torch.cuda.empty_cache()
196
  return positive_prompts_embeds
197
-
198
- runner.config.diffusion.cfg.scale = cfg_scale
199
- runner.config.diffusion.cfg.rescale = cfg_rescale
200
- runner.config.diffusion.timesteps.sampling.steps = sample_steps
201
  runner.configure_diffusion()
202
  set_seed(int(seed) % (2**32), same_across_ranks=True)
203
  os.makedirs("output", exist_ok=True)
204
-
205
  original_videos = [os.path.basename(video_path)]
206
  original_videos_local = partition_by_size(original_videos, batch_size)
207
  positive_prompts_embeds = _extract_text_embeds()
208
-
209
- video_transform = Compose([
210
- NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
211
- Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
212
- DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w"),
213
- ])
214
-
215
  for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)):
216
  media_type, _ = mimetypes.guess_type(video_path)
217
  is_video = media_type and media_type.startswith("video")
218
-
219
  if is_video:
220
- video, _, _ = read_video(video_path, output_format="TCHW")
221
- video = video[:121] / 255.0
222
- output_dir = os.path.join("output", f"{uuid.uuid4()}.mp4")
223
- else: # Assumimos que é uma imagem
224
- video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
225
- output_dir = os.path.join("output", f"{uuid.uuid4()}.png")
226
-
227
  cond_latents = [video_transform(video.to("cuda"))]
228
  ori_lengths = [v.size(1) for v in cond_latents]
229
  cond_latents = runner.vae_encode(cond_latents)
230
-
231
  for key in ["texts_pos", "texts_neg"]:
232
- for i, emb in enumerate(text_embeds[key]):
233
- text_embeds[key][i] = emb.to("cuda")
234
-
235
  samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
236
  del cond_latents
237
-
238
  for sample, ori_length in zip(samples, ori_lengths):
239
  sample = sample[:ori_length].to("cpu")
240
  sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy()
241
-
242
- if is_video:
243
- mediapy.write_video(output_dir, sample, fps=fps_out)
244
- else:
245
- mediapy.write_image(output_dir, sample[0])
246
-
247
  gc.collect(); torch.cuda.empty_cache()
248
  return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir)
249
 
250
  with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
251
  gr.HTML(f"""
252
- <div style='text-align:center; margin-bottom: 10px;'>
253
- <img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;' alt='SeedVR logo'/>
254
- </div>
255
- <p><b>Demonstração oficial do Gradio</b> para
256
  <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
257
- <b>SeedVR2 7b: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
258
  🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
259
  </p>
260
  """)
261
-
262
  with gr.Row():
263
  input_file = gr.File(label="Carregar imagem ou vídeo")
264
  with gr.Column():
265
  seed = gr.Number(label="Seed", value=666)
266
  fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
267
-
268
  run_button = gr.Button("Executar")
269
-
270
  with gr.Row():
271
  output_image = gr.Image(label="Imagem de Saída")
272
  output_video = gr.Video(label="Vídeo de Saída")
273
-
274
  download_link = gr.File(label="Baixar o resultado")
275
-
276
  run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
277
-
278
  gr.Examples(
279
  examples=[
280
  ["01.mp4", 4, 24],
@@ -283,7 +243,6 @@ with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
283
  ],
284
  inputs=[input_file, seed, fps]
285
  )
286
-
287
  gr.HTML("""
288
  <hr>
289
  <p>Se você achou o SeedVR útil, por favor ⭐ o
@@ -297,5 +256,4 @@ with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
297
  <h4>Limitações</h4>
298
  <p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
299
  """)
300
-
301
  demo.queue().launch(share=True)
 
28
  # --- ETAPA 2: Configuração dos Caminhos ---
29
  # Mudar para o diretório do repositório e adicioná-lo ao path do Python.
30
 
 
31
  os.chdir(repo_dir_name)
32
  print(f"Diretório de trabalho alterado para: {os.getcwd()}")
 
 
33
  sys.path.insert(0, os.path.abspath('.'))
34
  print(f"Diretório atual adicionado ao sys.path para importações.")
35
 
 
40
  from pathlib import Path
41
  from urllib.parse import urlparse
42
  from torch.hub import download_url_to_file, get_dir
 
43
 
44
  # Função de download do original
45
  def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
 
66
  'apex': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl'
67
  }
68
 
69
+
70
  # Criar diretório de checkpoints e baixar modelos
71
  ckpt_dir = Path('./ckpts')
72
  ckpt_dir.mkdir(exist_ok=True)
 
73
  for key, url in pretrain_model_url.items():
74
  filename = os.path.basename(url)
75
  model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
 
81
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
82
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
83
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
 
84
 
85
+ # --- REFINAMENTO: Compilar dependências do zero para a GPU L40S (Ada Lovelace) ---
86
  python_executable = sys.executable
 
87
 
88
+ print("Instalando flash-attn compilando do zero...")
89
+ # Força a reinstalação a partir do zero para garantir que seja compilado para a GPU atual
90
+ subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", "flash-attn"], check=True)
 
 
 
 
91
 
92
+ print("Clonando e compilando o Apex do zero...")
93
+ if not os.path.exists("apex"):
94
+ subprocess.run("git clone https://github.com/NVIDIA/apex", shell=True, check=True)
95
+
96
+ # Instala o Apex a partir da fonte clonada, o que força a compilação para a GPU L40S
97
+ # As flags --cpp_ext e --cuda_ext são essenciais para a compilação
98
+ subprocess.run(
99
+ [python_executable, "-m", "pip", "install", "-v", "--disable-pip-version-check", "--no-cache-dir", "--global-option=--cpp_ext", "--global-option=--cuda_ext", "./apex"],
100
+ check=True
101
+ )
102
+ print("✅ Configuração do Apex concluída.")
103
 
104
+ # --- ETAPA 4: Execução do Código Principal da Aplicação ---
105
  import mediapy
106
  from einops import rearrange
107
  from omegaconf import OmegaConf
 
131
  os.environ["MASTER_PORT"] = "12355"
132
  os.environ["RANK"] = str(0)
133
  os.environ["WORLD_SIZE"] = str(1)
134
+ # Adiciona uma variável de ambiente que pode ajudar o PyTorch a debugar erros de CUDA
135
+ os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
136
 
137
  if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
138
  from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
 
161
  def generation_step(runner, text_embeds_dict, cond_latents):
162
  def _move_to_cuda(x):
163
  return [i.to(torch.device("cuda")) for i in x]
 
164
  noises = [torch.randn_like(latent) for latent in cond_latents]
165
  aug_noises = [torch.randn_like(latent) for latent in cond_latents]
166
  noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
167
  noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
 
168
  def _add_noise(x, aug_noise):
169
  t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1
170
  shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
171
  t = runner.timestep_transform(t, shape)
172
  return runner.schedule.forward(x, aug_noise, t)
 
173
  conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)]
 
174
  with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
175
  video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict)
 
176
  return [rearrange(video, "c t h w -> t c h w") for video in video_tensors]
177
 
178
+ @spaces.GPU
179
  def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
180
+ if video_path is None: return None, None, None
 
 
181
  runner = configure_runner(1)
 
182
  def _extract_text_embeds():
183
  positive_prompts_embeds = []
184
  for _ in original_videos_local:
185
+ positive_prompts_embeds.append({"texts_pos": [torch.load('pos_emb.pt')], "texts_neg": [torch.load('neg_emb.pt')]})
 
 
 
186
  gc.collect(); torch.cuda.empty_cache()
187
  return positive_prompts_embeds
188
+ runner.config.diffusion.cfg.scale, runner.config.diffusion.cfg.rescale, runner.config.diffusion.timesteps.sampling.steps = cfg_scale, cfg_rescale, sample_steps
 
 
 
189
  runner.configure_diffusion()
190
  set_seed(int(seed) % (2**32), same_across_ranks=True)
191
  os.makedirs("output", exist_ok=True)
 
192
  original_videos = [os.path.basename(video_path)]
193
  original_videos_local = partition_by_size(original_videos, batch_size)
194
  positive_prompts_embeds = _extract_text_embeds()
195
+ video_transform = Compose([NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False), Lambda(lambda x: torch.clamp(x, 0.0, 1.0)), DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")])
 
 
 
 
 
 
196
  for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)):
197
  media_type, _ = mimetypes.guess_type(video_path)
198
  is_video = media_type and media_type.startswith("video")
 
199
  if is_video:
200
+ video, _, _ = read_video(video_path, output_format="TCHW"); video = video[:121] / 255.0; output_dir = os.path.join("output", f"{uuid.uuid4()}.mp4")
201
+ else:
202
+ video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0); output_dir = os.path.join("output", f"{uuid.uuid4()}.png")
 
 
 
 
203
  cond_latents = [video_transform(video.to("cuda"))]
204
  ori_lengths = [v.size(1) for v in cond_latents]
205
  cond_latents = runner.vae_encode(cond_latents)
 
206
  for key in ["texts_pos", "texts_neg"]:
207
+ for i, emb in enumerate(text_embeds[key]): text_embeds[key][i] = emb.to("cuda")
 
 
208
  samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
209
  del cond_latents
 
210
  for sample, ori_length in zip(samples, ori_lengths):
211
  sample = sample[:ori_length].to("cpu")
212
  sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy()
213
+ if is_video: mediapy.write_video(output_dir, sample, fps=fps_out)
214
+ else: mediapy.write_image(output_dir, sample[0])
 
 
 
 
215
  gc.collect(); torch.cuda.empty_cache()
216
  return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir)
217
 
218
  with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
219
  gr.HTML(f"""
220
+
221
+ <p>
 
 
222
  <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
223
+ <b>SeedVR2 7B: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
224
  🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
225
  </p>
226
  """)
 
227
  with gr.Row():
228
  input_file = gr.File(label="Carregar imagem ou vídeo")
229
  with gr.Column():
230
  seed = gr.Number(label="Seed", value=666)
231
  fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
 
232
  run_button = gr.Button("Executar")
 
233
  with gr.Row():
234
  output_image = gr.Image(label="Imagem de Saída")
235
  output_video = gr.Video(label="Vídeo de Saída")
 
236
  download_link = gr.File(label="Baixar o resultado")
 
237
  run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
 
238
  gr.Examples(
239
  examples=[
240
  ["01.mp4", 4, 24],
 
243
  ],
244
  inputs=[input_file, seed, fps]
245
  )
 
246
  gr.HTML("""
247
  <hr>
248
  <p>Se você achou o SeedVR útil, por favor ⭐ o
 
256
  <h4>Limitações</h4>
257
  <p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
258
  """)
 
259
  demo.queue().launch(share=True)