SeedVR2-3B-Image-Upscale

Running on Zero

App Files Files Community

bbqhan commited on 3 days ago

Commit

c1a0d47

verified ·

1 Parent(s): f34d937

testing optimization for h200

Browse files

Changes Made for H200:

Enabled TF32: Crucial for H200 matrix multiplication speed.

Max-Autotune Compilation: Enabled torch.compile with max-autotune on the DiT model. This extracts maximum performance from the H200.

VRAM Utilization: Increased inference batch size capabilities (implicit) and removed aggressive GC.

Hopper Flash Attention: Updated the installation flags to ensure the flash-attention build targets Hopper (sm_90).

Files changed (1) hide show

app.py +28 -32

app.py CHANGED Viewed

@@ -18,10 +18,16 @@ from einops import rearrange
 from torchvision.transforms import Compose, Lambda, Normalize
 import torchvision.transforms as T
-# --- Project Specific Imports (Assumed to be present in repo) ---
 from data.image.transforms.divisible_crop import DivisibleCrop
 from data.image.transforms.na_resize import NaResize
-# Note: Keeping Rearrange in case it's a specific wrapper, though typically einops suffices
 from data.video.transforms.rearrange import Rearrange
 if os.path.exists("./projects/video_diffusion_sr/color_fix.py"):
@@ -43,10 +49,11 @@ os.environ["MASTER_PORT"] = "12355"
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
-# Install Flash Attention if missing
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
-    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
     shell=True,
 )
@@ -101,13 +108,23 @@ def configure_runner():
     OmegaConf.set_readonly(runner.config, False)
     # Standard init for single GPU
-    init_torch(cudnn_benchmark=False)
     runner.configure_dit_model(device="cuda", checkpoint='./ckpts/seedvr2_ema_3b.pth')
     runner.configure_vae_model()
     if hasattr(runner.vae, "set_memory_limit"):
-        runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
     return runner
 @spaces.GPU(duration=100)
@@ -182,7 +199,6 @@ def upscale_image(image_path, seed=666, cfg_scale=1.0):
     output_filename = f'output/{uuid.uuid4()}.png'
     # Prepare Transforms
-    # Note: Model is optimized for 2560x1440 area equivalent
     video_transform = Compose([
         NaResize(resolution=(2560 * 1440) ** 0.5, mode="area", downsample_only=False),
         Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
@@ -194,7 +210,6 @@ def upscale_image(image_path, seed=666, cfg_scale=1.0):
     # Load and Preprocess Image
     img = Image.open(image_path).convert("RGB")
     img_tensor = T.ToTensor()(img).unsqueeze(0)  # (1, C, H, W)
-    # Model expects (C, T, H, W), for image T=1
     video_input = img_tensor.permute(0, 1, 2, 3)
     cond_latents = [video_transform(video_input.to(torch.device("cuda")))]
@@ -214,7 +229,6 @@ def upscale_image(image_path, seed=666, cfg_scale=1.0):
     # Post-process
     sample = samples[0]
-    # Handle tensor shaping for colorfix
     input_ref = (
         rearrange(input_tensor[:, None], "c t h w -> t c h w")
         if input_tensor.ndim == 3
@@ -239,23 +253,20 @@ def upscale_image(image_path, seed=666, cfg_scale=1.0):
     result_image = Image.fromarray(sample[0])
     result_image.save(output_filename)
-    # Cleanup
-    del runner, cond_latents, samples
-    gc.collect()
     torch.cuda.empty_cache()
     return result_image, output_filename
 # --- Gradio UI ---
-# Custom CSS for the "Top Tier" look
 custom_css = """
-/* Font Import handled by Theme, but custom tweaks here */
 .gradio-container {
     font-family: 'IBM Plex Sans', sans-serif !important;
 }
-/* Header Styling */
 h1 {
     text-align: center;
     color: #FF7043;
@@ -269,8 +280,6 @@ h3 {
     font-weight: 400 !important;
     margin-top: 0 !important;
 }
-/* Button Styling - Vibrant Orange */
 button.primary {
     background: linear-gradient(135deg, #FF7043 0%, #FF5722 100%) !important;
     border: none !important;
@@ -281,8 +290,6 @@ button.primary:hover {
     transform: translateY(-1px);
     box-shadow: 0 10px 15px -3px rgba(255, 87, 34, 0.3), 0 4px 6px -2px rgba(255, 87, 34, 0.15) !important;
 }
-/* UI Boxes (Groups/Columns) */
 .ui-box {
     background: white;
     border: 1px solid #E5E7EB;
@@ -293,8 +300,6 @@ button.primary:hover {
     display: flex;
     flex-direction: column;
 }
-/* Footer Styling */
 .footer-link {
     color: #FF7043;
     text-decoration: none;
@@ -305,7 +310,6 @@ button.primary:hover {
 }
 """
-# Refined Theme
 theme = gr.themes.Soft(
     primary_hue="orange",
     secondary_hue="zinc",
@@ -315,16 +319,13 @@ theme = gr.themes.Soft(
 ).set(
     body_background_fill="#F9FAFB",
     block_background_fill="white",
-    block_border_width="0px", # Clean look
     block_shadow="none",
-    # Remove orange background from labels
     block_label_background_fill="transparent",
     block_label_text_color="#4B5563",
     block_label_text_weight="600",
     block_title_text_color="#1F2937",
-    # Input/Output styling
     input_background_fill="#F3F4F6",
-    # Primary Button (Orange)
     button_primary_background_fill="#FF7043",
     button_primary_background_fill_hover="#F4511E",
     button_primary_text_color="white",
@@ -341,7 +342,6 @@ with gr.Blocks(theme=theme, css=custom_css, title="SeedVR2 Image Upscaler") as d
         )
     with gr.Row(equal_height=True):
-        # Left Column: Input
         with gr.Column(scale=1, elem_classes="ui-box"):
             gr.Markdown("#### Source", elem_id="input-header")
             input_image = gr.Image(
@@ -358,12 +358,9 @@ with gr.Blocks(theme=theme, css=custom_css, title="SeedVR2 Image Upscaler") as d
                     seed_input = gr.Number(label="Seed", value=666, precision=0, container=True)
                     cfg_input = gr.Slider(label="CFG Scale", minimum=0.0, maximum=10.0, value=1.0, step=0.1, container=True)
-            # Spacer
             gr.HTML("<div style='height: 20px;'></div>")
             run_btn = gr.Button("Upscale Image", variant="primary", size="lg")
-        # Right Column: Output
         with gr.Column(scale=1, elem_classes="ui-box"):
             gr.Markdown("#### Result", elem_id="output-header")
             output_image = gr.Image(
@@ -380,7 +377,6 @@ with gr.Blocks(theme=theme, css=custom_css, title="SeedVR2 Image Upscaler") as d
         outputs=[output_image, download_file]
     )
-    # Footer
     gr.HTML(
         """
         <div style="text-align: center; margin-top: 40px; margin-bottom: 20px; font-size: 0.9em; color: #6B7280;">

 from torchvision.transforms import Compose, Lambda, Normalize
 import torchvision.transforms as T
+# --- H200 Optimization Flags ---
+# Enable TensorFloat-32 (Crucial for H100/H200 speed)
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+# optimizing for Hopper architecture
+os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0"
+# --- Project Specific Imports ---
 from data.image.transforms.divisible_crop import DivisibleCrop
 from data.image.transforms.na_resize import NaResize
 from data.video.transforms.rearrange import Rearrange
 if os.path.exists("./projects/video_diffusion_sr/color_fix.py"):
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
+# Install Flash Attention tailored for H200 (Hopper)
+# We skip the build check to force it to look at the H200 environment
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "FALSE", "TORCH_CUDA_ARCH_LIST": "9.0"},
     shell=True,
 )
     OmegaConf.set_readonly(runner.config, False)
     # Standard init for single GPU
+    init_torch(cudnn_benchmark=True) # Benchmark True is safe on H200
     runner.configure_dit_model(device="cuda", checkpoint='./ckpts/seedvr2_ema_3b.pth')
     runner.configure_vae_model()
     if hasattr(runner.vae, "set_memory_limit"):
+        # H200 has massive memory, we can relax limits if config allows
+        # runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
+        pass
+    # --- H200 OPTIMIZATION: COMPILE DiT ---
+    # We use 'max-autotune' because H200 can handle the compilation search space
+    # and results in significantly faster kernels than standard eager mode.
+    # We disable fullgraph to handle some dynamic control flow if present.
+    print("🚀 Optimizing DiT for H200 (max-autotune)... this may take a minute on first run.")
+    runner.dit = torch.compile(runner.dit, mode="max-autotune", fullgraph=False)
     return runner
 @spaces.GPU(duration=100)
     output_filename = f'output/{uuid.uuid4()}.png'
     # Prepare Transforms
     video_transform = Compose([
         NaResize(resolution=(2560 * 1440) ** 0.5, mode="area", downsample_only=False),
         Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
     # Load and Preprocess Image
     img = Image.open(image_path).convert("RGB")
     img_tensor = T.ToTensor()(img).unsqueeze(0)  # (1, C, H, W)
     video_input = img_tensor.permute(0, 1, 2, 3)
     cond_latents = [video_transform(video_input.to(torch.device("cuda")))]
     # Post-process
     sample = samples[0]
     input_ref = (
         rearrange(input_tensor[:, None], "c t h w -> t c h w")
         if input_tensor.ndim == 3
     result_image = Image.fromarray(sample[0])
     result_image.save(output_filename)
+    # Less aggressive cleanup on H200 to keep JIT kernels warm
+    # del runner
+    # gc.collect()
     torch.cuda.empty_cache()
     return result_image, output_filename
 # --- Gradio UI ---
 custom_css = """
+/* Font Import handled by Theme */
 .gradio-container {
     font-family: 'IBM Plex Sans', sans-serif !important;
 }
 h1 {
     text-align: center;
     color: #FF7043;
     font-weight: 400 !important;
     margin-top: 0 !important;
 }
 button.primary {
     background: linear-gradient(135deg, #FF7043 0%, #FF5722 100%) !important;
     border: none !important;
     transform: translateY(-1px);
     box-shadow: 0 10px 15px -3px rgba(255, 87, 34, 0.3), 0 4px 6px -2px rgba(255, 87, 34, 0.15) !important;
 }
 .ui-box {
     background: white;
     border: 1px solid #E5E7EB;
     display: flex;
     flex-direction: column;
 }
 .footer-link {
     color: #FF7043;
     text-decoration: none;
 }
 """
 theme = gr.themes.Soft(
     primary_hue="orange",
     secondary_hue="zinc",
 ).set(
     body_background_fill="#F9FAFB",
     block_background_fill="white",
+    block_border_width="0px",
     block_shadow="none",
     block_label_background_fill="transparent",
     block_label_text_color="#4B5563",
     block_label_text_weight="600",
     block_title_text_color="#1F2937",
     input_background_fill="#F3F4F6",
     button_primary_background_fill="#FF7043",
     button_primary_background_fill_hover="#F4511E",
     button_primary_text_color="white",
         )
     with gr.Row(equal_height=True):
         with gr.Column(scale=1, elem_classes="ui-box"):
             gr.Markdown("#### Source", elem_id="input-header")
             input_image = gr.Image(
                     seed_input = gr.Number(label="Seed", value=666, precision=0, container=True)
                     cfg_input = gr.Slider(label="CFG Scale", minimum=0.0, maximum=10.0, value=1.0, step=0.1, container=True)
             gr.HTML("<div style='height: 20px;'></div>")
             run_btn = gr.Button("Upscale Image", variant="primary", size="lg")
         with gr.Column(scale=1, elem_classes="ui-box"):
             gr.Markdown("#### Result", elem_id="output-header")
             output_image = gr.Image(
         outputs=[output_image, download_file]
     )
     gr.HTML(
         """
         <div style="text-align: center; margin-top: 40px; margin-bottom: 20px; font-size: 0.9em; color: #6B7280;">