Spaces:

multimodalart
/

Helios-Distilled

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 1 day ago

Commit

9f13b69

verified ·

1 Parent(s): 4dc6132

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -71

app.py CHANGED Viewed

@@ -4,14 +4,14 @@ import sys
 import time
 import tempfile
 import zipfile
 # ---------------------------------------------------------------------------
-# Install private diffusers fork from bundled zip before anything imports it
 # ---------------------------------------------------------------------------
 _APP_DIR = os.path.dirname(os.path.abspath(__file__))
 ZIP_PATH = os.path.join(_APP_DIR, "helios_diffusers.zip")
 EXTRACT_DIR = os.path.join(_APP_DIR, "_helios_diffusers")
 _PKG_ROOT = os.path.join(EXTRACT_DIR, "diffusers-new-model-addition-helios-helios")
 if not os.path.isdir(_PKG_ROOT):
@@ -25,16 +25,12 @@ try:
 except subprocess.CalledProcessError as e:
     print(f"[setup] pip install failed (exit {e.returncode}), falling back to sys.path")
-# Always ensure the src-layout package is importable
 _SRC_DIR = os.path.join(_PKG_ROOT, "src")
 if os.path.isdir(_SRC_DIR):
     sys.path.insert(0, _SRC_DIR)
-    print(f"[setup] Added {_SRC_DIR} to sys.path")
 import gradio as gr
 import spaces
-import torch
 from diffusers import (
     AutoencoderKLWan,
     HeliosPyramidPipeline,
@@ -43,7 +39,7 @@ from diffusers import (
 from diffusers.utils import export_to_video, load_image, load_video
 # ---------------------------------------------------------------------------
-# Pre-load model at import time (cached on ZeroGPU)
 # ---------------------------------------------------------------------------
 MODEL_ID = "BestWishYsh/Helios-Distilled"
@@ -58,13 +54,53 @@ pipe = HeliosPyramidPipeline.from_pretrained(
 )
 pipe.to("cuda")
-compiled_transformer = spaces.aoti_load("helios_distilled_transformer.pt2")
-spaces.aoti_apply(compiled_transformer, pipe.transformer)
-#pipe.transformer.set_attention_backend("_flash_3_hub")
 # ---------------------------------------------------------------------------
-# Generation (decorated for ZeroGPU)
 # ---------------------------------------------------------------------------
 @spaces.GPU(duration=300)
 def generate_video(
@@ -80,7 +116,6 @@ def generate_video(
     is_amplify_first_chunk: bool,
     progress=gr.Progress(track_tqdm=True),
 ):
-    """Run the Helios-Distilled pipeline and return the generated video."""
     if not prompt:
         raise gr.Error("Please provide a prompt.")
@@ -102,7 +137,6 @@ def generate_video(
         "is_amplify_first_chunk": is_amplify_first_chunk,
     }
-    # Conditional inputs
     if mode == "Image-to-Video" and image_input is not None:
         img = load_image(image_input).resize((int(width), int(height)))
         kwargs["image"] = img
@@ -118,12 +152,10 @@ def generate_video(
     info = f"Generated in {elapsed:.1f}s · {num_frames} frames · {height}×{width}"
     return tmp.name, info
 # ---------------------------------------------------------------------------
-# Visibility toggle for conditional inputs
 # ---------------------------------------------------------------------------
 def update_conditional_visibility(mode):
-    """Show image input for I2V, video input for V2V, hide both for T2V."""
     if mode == "Image-to-Video":
         return gr.update(visible=True), gr.update(visible=False)
     elif mode == "Video-to-Video":
@@ -131,14 +163,9 @@ def update_conditional_visibility(mode):
     else:
         return gr.update(visible=False), gr.update(visible=False)
-# ---------------------------------------------------------------------------
-# Gradio UI
-# ---------------------------------------------------------------------------
 CSS = """
 #header { text-align: center; margin-bottom: 0.5em; }
 #header h1 { font-size: 2.2em; margin-bottom: 0; }
-#header p { opacity: 0.7; margin-top: 0.2em; }
 .contain { max-width: 1350px; margin: 0 auto !important; }
 """
@@ -147,88 +174,48 @@ with gr.Blocks(css=CSS, title="Helios Video Generation", theme=gr.themes.Soft())
         """
         <div id="header">
             <h1>🎬 Helios 14B distilled</h1>
-            <p></p>
         </div>
         """
     )
     with gr.Row():
-        # ---- Left column: Controls ----
         with gr.Column(scale=1):
             mode = gr.Radio(
                 choices=["Text-to-Video", "Image-to-Video", "Video-to-Video"],
                 value="Text-to-Video",
                 label="Generation Mode",
             )
-            # Conditional inputs placed above the prompt, visible based on mode
-            image_input = gr.Image(
-                label="Image (for I2V)", type="filepath", visible=False
-            )
-            video_input = gr.Video(
-                label="Video (for V2V)", visible=False
-            )
             prompt = gr.Textbox(
                 label="Prompt",
                 lines=4,
-                placeholder="Describe the video you want to generate…",
-                value=(
-                    "A vibrant tropical fish swimming gracefully among colorful coral "
-                    "reefs in a clear, turquoise ocean. The fish has bright blue and yellow "
-                    "scales with a small, distinctive orange spot on its side, its fins "
-                    "moving fluidly. A close-up shot with dynamic movement."
-                ),
             )
             with gr.Accordion("Advanced Settings", open=False):
                 with gr.Row():
                     height = gr.Number(value=384, label="Height", precision=0, interactive=False)
                     width = gr.Number(value=640, label="Width", precision=0, interactive=False)
                 with gr.Row():
-                    num_frames = gr.Slider(33, 240, value=33, step=33, label="Num Frames (must be multiple of 33)")
-                    num_inference_steps = gr.Slider(
-                        1, 10, value=2, step=1, label="Steps (per pyramid stage)"
-                    )
                 with gr.Row():
                     seed = gr.Number(value=42, label="Seed", precision=0)
-                    is_amplify_first_chunk = gr.Checkbox(
-                        label="Amplify First Chunk", value=True
-                    )
             generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg")
-        # ---- Right column: Output ----
         with gr.Column(scale=1):
             video_output = gr.Video(label="Generated Video", autoplay=True)
             info_output = gr.Textbox(label="Info", interactive=False)
-    # ---- Toggle conditional input visibility on mode change ----
-    mode.change(
-        fn=update_conditional_visibility,
-        inputs=[mode],
-        outputs=[image_input, video_input],
-    )
-    # ---- Generation ----
     generate_btn.click(
         fn=generate_video,
-        inputs=[
-            mode,
-            prompt,
-            image_input,
-            video_input,
-            height,
-            width,
-            num_frames,
-            num_inference_steps,
-            seed,
-            is_amplify_first_chunk,
-        ],
         outputs=[video_output, info_output],
     )
-    # ---- Examples ----
     gr.Examples(
         examples=[
             [
@@ -257,6 +244,5 @@ with gr.Blocks(css=CSS, title="Helios Video Generation", theme=gr.themes.Soft())
         label="Example Prompts",
     )
 if __name__ == "__main__":
     demo.launch()

 import time
 import tempfile
 import zipfile
+import torch
 # ---------------------------------------------------------------------------
+# Install private diffusers fork
 # ---------------------------------------------------------------------------
 _APP_DIR = os.path.dirname(os.path.abspath(__file__))
 ZIP_PATH = os.path.join(_APP_DIR, "helios_diffusers.zip")
 EXTRACT_DIR = os.path.join(_APP_DIR, "_helios_diffusers")
 _PKG_ROOT = os.path.join(EXTRACT_DIR, "diffusers-new-model-addition-helios-helios")
 if not os.path.isdir(_PKG_ROOT):
 except subprocess.CalledProcessError as e:
     print(f"[setup] pip install failed (exit {e.returncode}), falling back to sys.path")
 _SRC_DIR = os.path.join(_PKG_ROOT, "src")
 if os.path.isdir(_SRC_DIR):
     sys.path.insert(0, _SRC_DIR)
 import gradio as gr
 import spaces
 from diffusers import (
     AutoencoderKLWan,
     HeliosPyramidPipeline,
 from diffusers.utils import export_to_video, load_image, load_video
 # ---------------------------------------------------------------------------
+# Pre-load model
 # ---------------------------------------------------------------------------
 MODEL_ID = "BestWishYsh/Helios-Distilled"
 )
 pipe.to("cuda")
+# ---------------------------------------------------------------------------
+# 🔥 AOT LOADING LOGIC 🔥
+# ---------------------------------------------------------------------------
+AOT_FILENAME = "helios_distilled_transformer.pt2"
+AOT_PATH = os.path.join(_APP_DIR, AOT_FILENAME)
+def load_aot_model(path, original_module):
+    """
+    Loads a raw AOTI package (.pt2) and patches the original module.
+    """
+    print(f"[AOT] Loading AOTI package from {path}...")
+    # 1. Load the compiled runner
+    # This returns a torch._inductor.codecache.PyTorchCompiledModule
+    compiled_model = torch._inductor.aoti_load_package(path)
+    # 2. We need to load constants (weights) into it.
+    # Since we exported with 'package_constants_on_disk': True, weights are inside the pt2.
+    # However, to be safe, we usually need to map them.
+    # But for a simple load, let's try the direct callable first.
+    # 3. Patch the forward method
+    # We create a wrapper to handle the call signature if needed,
+    # but AOTI usually preserves the signature of the exported graph.
+    original_module.forward = compiled_model
+    # 4. Clear old weights to save VRAM (optional but recommended)
+    # BE CAREFUL: This deletes the original weights. If AOT failed to embed them, this breaks things.
+    # Since we used default AOTI export, weights are embedded in the .so or .pt2
+    original_module.to("meta")
+    print("[AOT] Model patched successfully!")
+if os.path.exists(AOT_PATH):
+    try:
+        load_aot_model(AOT_PATH, pipe.transformer)
+    except Exception as e:
+        print(f"[AOT] ❌ Failed to load compiled graph: {e}")
+        # Restore device if failed
+        pipe.to("cuda")
+        pipe.transformer.set_attention_backend("_flash_3_hub")
+else:
+    print(f"[AOT] ⚠️ No compiled graph found at {AOT_PATH}.")
+    pipe.transformer.set_attention_backend("_flash_3_hub")
 # ---------------------------------------------------------------------------
+# Generation
 # ---------------------------------------------------------------------------
 @spaces.GPU(duration=300)
 def generate_video(
     is_amplify_first_chunk: bool,
     progress=gr.Progress(track_tqdm=True),
 ):
     if not prompt:
         raise gr.Error("Please provide a prompt.")
         "is_amplify_first_chunk": is_amplify_first_chunk,
     }
     if mode == "Image-to-Video" and image_input is not None:
         img = load_image(image_input).resize((int(width), int(height)))
         kwargs["image"] = img
     info = f"Generated in {elapsed:.1f}s · {num_frames} frames · {height}×{width}"
     return tmp.name, info
 # ---------------------------------------------------------------------------
+# UI Setup
 # ---------------------------------------------------------------------------
 def update_conditional_visibility(mode):
     if mode == "Image-to-Video":
         return gr.update(visible=True), gr.update(visible=False)
     elif mode == "Video-to-Video":
     else:
         return gr.update(visible=False), gr.update(visible=False)
 CSS = """
 #header { text-align: center; margin-bottom: 0.5em; }
 #header h1 { font-size: 2.2em; margin-bottom: 0; }
 .contain { max-width: 1350px; margin: 0 auto !important; }
 """
         """
         <div id="header">
             <h1>🎬 Helios 14B distilled</h1>
         </div>
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
             mode = gr.Radio(
                 choices=["Text-to-Video", "Image-to-Video", "Video-to-Video"],
                 value="Text-to-Video",
                 label="Generation Mode",
             )
+            image_input = gr.Image(label="Image (for I2V)", type="filepath", visible=False)
+            video_input = gr.Video(label="Video (for V2V)", visible=False)
             prompt = gr.Textbox(
                 label="Prompt",
                 lines=4,
+                value="A vibrant tropical fish swimming gracefully...",
             )
             with gr.Accordion("Advanced Settings", open=False):
                 with gr.Row():
                     height = gr.Number(value=384, label="Height", precision=0, interactive=False)
                     width = gr.Number(value=640, label="Width", precision=0, interactive=False)
                 with gr.Row():
+                    num_frames = gr.Slider(33, 240, value=33, step=33, label="Num Frames")
+                    num_inference_steps = gr.Slider(1, 10, value=2, step=1, label="Steps per stage")
                 with gr.Row():
                     seed = gr.Number(value=42, label="Seed", precision=0)
+                    is_amplify_first_chunk = gr.Checkbox(label="Amplify First Chunk", value=True)
             generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg")
         with gr.Column(scale=1):
             video_output = gr.Video(label="Generated Video", autoplay=True)
             info_output = gr.Textbox(label="Info", interactive=False)
+    mode.change(fn=update_conditional_visibility, inputs=[mode], outputs=[image_input, video_input])
     generate_btn.click(
         fn=generate_video,
+        inputs=[mode, prompt, image_input, video_input, height, width, num_frames, num_inference_steps, seed, is_amplify_first_chunk],
         outputs=[video_output, info_output],
     )
     gr.Examples(
         examples=[
             [
         label="Example Prompts",
     )
 if __name__ == "__main__":
     demo.launch()