SeedVR2-3B-Image-Upscale

Running on Zero

App Files Files Community

bbqhan commited on 3 days ago

Commit

d7c03c2

verified ·

1 Parent(s): c1a0d47

reverting back, error encountered.

Browse files

Files changed (1) hide show

app.py +28 -28

app.py CHANGED Viewed

@@ -18,16 +18,10 @@ from einops import rearrange
 from torchvision.transforms import Compose, Lambda, Normalize
 import torchvision.transforms as T
-# --- H200 Optimization Flags ---
-# Enable TensorFloat-32 (Crucial for H100/H200 speed)
-torch.backends.cuda.matmul.allow_tf32 = True
-torch.backends.cudnn.allow_tf32 = True
-# optimizing for Hopper architecture
-os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0"
-# --- Project Specific Imports ---
 from data.image.transforms.divisible_crop import DivisibleCrop
 from data.image.transforms.na_resize import NaResize
 from data.video.transforms.rearrange import Rearrange
 if os.path.exists("./projects/video_diffusion_sr/color_fix.py"):
@@ -49,11 +43,10 @@ os.environ["MASTER_PORT"] = "12355"
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
-# Install Flash Attention tailored for H200 (Hopper)
-# We skip the build check to force it to look at the H200 environment
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
-    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "FALSE", "TORCH_CUDA_ARCH_LIST": "9.0"},
     shell=True,
 )
@@ -108,23 +101,13 @@ def configure_runner():
     OmegaConf.set_readonly(runner.config, False)
     # Standard init for single GPU
-    init_torch(cudnn_benchmark=True) # Benchmark True is safe on H200
     runner.configure_dit_model(device="cuda", checkpoint='./ckpts/seedvr2_ema_3b.pth')
     runner.configure_vae_model()
     if hasattr(runner.vae, "set_memory_limit"):
-        # H200 has massive memory, we can relax limits if config allows
-        # runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
-        pass
-    # --- H200 OPTIMIZATION: COMPILE DiT ---
-    # We use 'max-autotune' because H200 can handle the compilation search space
-    # and results in significantly faster kernels than standard eager mode.
-    # We disable fullgraph to handle some dynamic control flow if present.
-    print("🚀 Optimizing DiT for H200 (max-autotune)... this may take a minute on first run.")
-    runner.dit = torch.compile(runner.dit, mode="max-autotune", fullgraph=False)
     return runner
 @spaces.GPU(duration=100)
@@ -199,6 +182,7 @@ def upscale_image(image_path, seed=666, cfg_scale=1.0):
     output_filename = f'output/{uuid.uuid4()}.png'
     # Prepare Transforms
     video_transform = Compose([
         NaResize(resolution=(2560 * 1440) ** 0.5, mode="area", downsample_only=False),
         Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
@@ -210,6 +194,7 @@ def upscale_image(image_path, seed=666, cfg_scale=1.0):
     # Load and Preprocess Image
     img = Image.open(image_path).convert("RGB")
     img_tensor = T.ToTensor()(img).unsqueeze(0)  # (1, C, H, W)
     video_input = img_tensor.permute(0, 1, 2, 3)
     cond_latents = [video_transform(video_input.to(torch.device("cuda")))]
@@ -229,6 +214,7 @@ def upscale_image(image_path, seed=666, cfg_scale=1.0):
     # Post-process
     sample = samples[0]
     input_ref = (
         rearrange(input_tensor[:, None], "c t h w -> t c h w")
         if input_tensor.ndim == 3
@@ -253,20 +239,22 @@ def upscale_image(image_path, seed=666, cfg_scale=1.0):
     result_image = Image.fromarray(sample[0])
     result_image.save(output_filename)
-    # Less aggressive cleanup on H200 to keep JIT kernels warm
-    # del runner
-    # gc.collect()
     torch.cuda.empty_cache()
     return result_image, output_filename
 # --- Gradio UI ---
 custom_css = """
-/* Font Import handled by Theme */
 .gradio-container {
     font-family: 'IBM Plex Sans', sans-serif !important;
 }
 h1 {
     text-align: center;
     color: #FF7043;
@@ -280,6 +268,7 @@ h3 {
     font-weight: 400 !important;
     margin-top: 0 !important;
 }
 button.primary {
     background: linear-gradient(135deg, #FF7043 0%, #FF5722 100%) !important;
     border: none !important;
@@ -290,6 +279,7 @@ button.primary:hover {
     transform: translateY(-1px);
     box-shadow: 0 10px 15px -3px rgba(255, 87, 34, 0.3), 0 4px 6px -2px rgba(255, 87, 34, 0.15) !important;
 }
 .ui-box {
     background: white;
     border: 1px solid #E5E7EB;
@@ -300,6 +290,7 @@ button.primary:hover {
     display: flex;
     flex-direction: column;
 }
 .footer-link {
     color: #FF7043;
     text-decoration: none;
@@ -310,6 +301,7 @@ button.primary:hover {
 }
 """
 theme = gr.themes.Soft(
     primary_hue="orange",
     secondary_hue="zinc",
@@ -319,13 +311,16 @@ theme = gr.themes.Soft(
 ).set(
     body_background_fill="#F9FAFB",
     block_background_fill="white",
-    block_border_width="0px",
     block_shadow="none",
     block_label_background_fill="transparent",
     block_label_text_color="#4B5563",
     block_label_text_weight="600",
     block_title_text_color="#1F2937",
     input_background_fill="#F3F4F6",
     button_primary_background_fill="#FF7043",
     button_primary_background_fill_hover="#F4511E",
     button_primary_text_color="white",
@@ -342,6 +337,7 @@ with gr.Blocks(theme=theme, css=custom_css, title="SeedVR2 Image Upscaler") as d
         )
     with gr.Row(equal_height=True):
         with gr.Column(scale=1, elem_classes="ui-box"):
             gr.Markdown("#### Source", elem_id="input-header")
             input_image = gr.Image(
@@ -358,9 +354,12 @@ with gr.Blocks(theme=theme, css=custom_css, title="SeedVR2 Image Upscaler") as d
                     seed_input = gr.Number(label="Seed", value=666, precision=0, container=True)
                     cfg_input = gr.Slider(label="CFG Scale", minimum=0.0, maximum=10.0, value=1.0, step=0.1, container=True)
             gr.HTML("<div style='height: 20px;'></div>")
             run_btn = gr.Button("Upscale Image", variant="primary", size="lg")
         with gr.Column(scale=1, elem_classes="ui-box"):
             gr.Markdown("#### Result", elem_id="output-header")
             output_image = gr.Image(
@@ -377,6 +376,7 @@ with gr.Blocks(theme=theme, css=custom_css, title="SeedVR2 Image Upscaler") as d
         outputs=[output_image, download_file]
     )
     gr.HTML(
         """
         <div style="text-align: center; margin-top: 40px; margin-bottom: 20px; font-size: 0.9em; color: #6B7280;">

 from torchvision.transforms import Compose, Lambda, Normalize
 import torchvision.transforms as T
+# --- Project Specific Imports (Assumed to be present in repo) ---
 from data.image.transforms.divisible_crop import DivisibleCrop
 from data.image.transforms.na_resize import NaResize
+# Note: Keeping Rearrange in case it's a specific wrapper, though typically einops suffices
 from data.video.transforms.rearrange import Rearrange
 if os.path.exists("./projects/video_diffusion_sr/color_fix.py"):
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
+# Install Flash Attention if missing
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
     shell=True,
 )
     OmegaConf.set_readonly(runner.config, False)
     # Standard init for single GPU
+    init_torch(cudnn_benchmark=False)
     runner.configure_dit_model(device="cuda", checkpoint='./ckpts/seedvr2_ema_3b.pth')
     runner.configure_vae_model()
     if hasattr(runner.vae, "set_memory_limit"):
+        runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
     return runner
 @spaces.GPU(duration=100)
     output_filename = f'output/{uuid.uuid4()}.png'
     # Prepare Transforms
+    # Note: Model is optimized for 2560x1440 area equivalent
     video_transform = Compose([
         NaResize(resolution=(2560 * 1440) ** 0.5, mode="area", downsample_only=False),
         Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
     # Load and Preprocess Image
     img = Image.open(image_path).convert("RGB")
     img_tensor = T.ToTensor()(img).unsqueeze(0)  # (1, C, H, W)
+    # Model expects (C, T, H, W), for image T=1
     video_input = img_tensor.permute(0, 1, 2, 3)
     cond_latents = [video_transform(video_input.to(torch.device("cuda")))]
     # Post-process
     sample = samples[0]
+    # Handle tensor shaping for colorfix
     input_ref = (
         rearrange(input_tensor[:, None], "c t h w -> t c h w")
         if input_tensor.ndim == 3
     result_image = Image.fromarray(sample[0])
     result_image.save(output_filename)
+    # Cleanup
+    del runner, cond_latents, samples
+    gc.collect()
     torch.cuda.empty_cache()
     return result_image, output_filename
 # --- Gradio UI ---
+# Custom CSS for the "Top Tier" look
 custom_css = """
+/* Font Import handled by Theme, but custom tweaks here */
 .gradio-container {
     font-family: 'IBM Plex Sans', sans-serif !important;
 }
+/* Header Styling */
 h1 {
     text-align: center;
     color: #FF7043;
     font-weight: 400 !important;
     margin-top: 0 !important;
 }
+/* Button Styling - Vibrant Orange */
 button.primary {
     background: linear-gradient(135deg, #FF7043 0%, #FF5722 100%) !important;
     border: none !important;
     transform: translateY(-1px);
     box-shadow: 0 10px 15px -3px rgba(255, 87, 34, 0.3), 0 4px 6px -2px rgba(255, 87, 34, 0.15) !important;
 }
+/* UI Boxes (Groups/Columns) */
 .ui-box {
     background: white;
     border: 1px solid #E5E7EB;
     display: flex;
     flex-direction: column;
 }
+/* Footer Styling */
 .footer-link {
     color: #FF7043;
     text-decoration: none;
 }
 """
+# Refined Theme
 theme = gr.themes.Soft(
     primary_hue="orange",
     secondary_hue="zinc",
 ).set(
     body_background_fill="#F9FAFB",
     block_background_fill="white",
+    block_border_width="0px", # Clean look
     block_shadow="none",
+    # Remove orange background from labels
     block_label_background_fill="transparent",
     block_label_text_color="#4B5563",
     block_label_text_weight="600",
     block_title_text_color="#1F2937",
+    # Input/Output styling
     input_background_fill="#F3F4F6",
+    # Primary Button (Orange)
     button_primary_background_fill="#FF7043",
     button_primary_background_fill_hover="#F4511E",
     button_primary_text_color="white",
         )
     with gr.Row(equal_height=True):
+        # Left Column: Input
         with gr.Column(scale=1, elem_classes="ui-box"):
             gr.Markdown("#### Source", elem_id="input-header")
             input_image = gr.Image(
                     seed_input = gr.Number(label="Seed", value=666, precision=0, container=True)
                     cfg_input = gr.Slider(label="CFG Scale", minimum=0.0, maximum=10.0, value=1.0, step=0.1, container=True)
+            # Spacer
             gr.HTML("<div style='height: 20px;'></div>")
             run_btn = gr.Button("Upscale Image", variant="primary", size="lg")
+        # Right Column: Output
         with gr.Column(scale=1, elem_classes="ui-box"):
             gr.Markdown("#### Result", elem_id="output-header")
             output_image = gr.Image(
         outputs=[output_image, download_file]
     )
+    # Footer
     gr.HTML(
         """
         <div style="text-align: center; margin-top: 40px; margin-bottom: 20px; font-size: 0.9em; color: #6B7280;">