Spaces:

multimodalart
/

Hunyuan-Video-1-5

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 21 days ago

Commit

939c549

verified ·

1 Parent(s): ecc3183

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -72

app.py CHANGED Viewed

@@ -6,42 +6,43 @@ import datetime
 import numpy as np
 from PIL import Image
 import imageio
-import spaces
 # --- Part 1: Auto-Setup (Clone Repo & Download Weights) ---
 REPO_URL = "https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5.git"
 REPO_DIR = os.path.abspath("HunyuanVideo-1.5")
-# Use Absolute Path to ensure the loader finds the folder
-MODEL_DIR = os.path.abspath("ckpts")
-HF_REPO_ID = "tencent/HunyuanVideo-1.5"
 # Configuration
 TRANSFORMER_VERSION = "480p_i2v_distilled"
 DTYPE = torch.bfloat16
-ENABLE_OFFLOADING = True
 def setup_environment():
-    """Clones the repo and downloads weights if they don't exist."""
     print("=" * 50)
     print("Checking Environment & Dependencies...")
-    # 1. Clone Repository
     if not os.path.exists(REPO_DIR):
         print(f"Cloning repository to {REPO_DIR}...")
         subprocess.run(["git", "clone", REPO_URL, REPO_DIR], check=True)
-    else:
-        print(f"Repository exists at {REPO_DIR}")
     # 2. Add Repo to Python Path
     if REPO_DIR not in sys.path:
         sys.path.insert(0, REPO_DIR)
-    # 3. Download Weights
-    # Check if key folders exist to verify download
-    transformer_path = os.path.join(MODEL_DIR, "transformer", TRANSFORMER_VERSION)
-    if not os.path.exists(transformer_path):
-        print(f"Downloading weights to {MODEL_DIR}...")
         try:
             from huggingface_hub import snapshot_download
             allow_patterns = [
@@ -53,92 +54,127 @@ def setup_environment():
                 "tokenizer/*"
             ]
             snapshot_download(
-                repo_id=HF_REPO_ID,
                 local_dir=MODEL_DIR,
-                allow_patterns=allow_patterns
             )
-            print("Download complete.")
         except Exception as e:
-            print(f"Error downloading weights: {e}")
             sys.exit(1)
-    else:
-        print(f"Weights found in {MODEL_DIR}")
     print("Environment Ready.")
     print("=" * 50)
-# Run setup immediately
 setup_environment()
-# --- Part 2: Imports from Cloned Repo ---
-# Set Env Vars for HyVideo
-if 'PYTORCH_CUDA_ALLOC_CONF' not in os.environ:
-    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
-# Even for single GPU, HyVideo code expects these env vars to be set
-os.environ['RANK'] = '0'
-os.environ['WORLD_SIZE'] = '1'
 try:
-    from hyvideo.pipelines.hunyuan_video_pipeline import HunyuanVideo_1_5_Pipeline
-    from hyvideo.commons.infer_state import initialize_infer_state
-    # Import module for patching
     import hyvideo.commons
     import hyvideo.pipelines.hunyuan_video_pipeline
 except ImportError as e:
     print(f"CRITICAL ERROR: {e}")
     sys.exit(1)
 import gradio as gr
 def dummy_get_gpu_memory(device=None):
-    # Return 40GB (in bytes) to trick the config loader into
-    # thinking we have a high-end GPU, allowing it to select
-    # optimal inference params without triggering torch.cuda.init()
-    return 68 * 1024 * 1024 * 1024
-print("🛠️  Applying ZeroGPU Monkey Patch to hyvideo.commons.get_gpu_memory...")
 hyvideo.commons.get_gpu_memory = dummy_get_gpu_memory
 hyvideo.pipelines.hunyuan_video_pipeline.get_gpu_memory = dummy_get_gpu_memory
-# --- Part 3: Model Initialization (Pre-Load) ---
-# Mock args for inference configuration (required by internal logic)
 class ArgsNamespace:
     def __init__(self):
         self.use_sageattn = False
         self.sage_blocks_range = "0-53"
         self.enable_torch_compile = False
-# Initialize internal state mock
 initialize_infer_state(ArgsNamespace())
-# Global Pipeline Variable
 pipe = None
-# Double check path exists
-if not os.path.isdir(MODEL_DIR):
-    print(f"❌ Error: Model directory not found at {MODEL_DIR}")
-    sys.exit(1)
-print(f"⏳ Initializing Pipeline ({TRANSFORMER_VERSION}) from {MODEL_DIR}...")
-try:
-    pipe = HunyuanVideo_1_5_Pipeline.create_pipeline(
-        pretrained_model_name_or_path=MODEL_DIR,
-        transformer_version=TRANSFORMER_VERSION,
-        enable_offloading=ENABLE_OFFLOADING,
-        enable_group_offloading=ENABLE_OFFLOADING,
-        transformer_dtype=DTYPE,
-    )
-    print("✅ Model loaded successfully!")
-except Exception as e:
-    print(f"❌ Failed to load model: {e}")
-    import traceback
-    traceback.print_exc()
-    sys.exit(1)
-pipe.to("cuda")
 def save_video_tensor(video_tensor, path, fps=24):
     if isinstance(video_tensor, list): video_tensor = video_tensor[0]
@@ -147,6 +183,8 @@ def save_video_tensor(video_tensor, path, fps=24):
     vid = vid.permute(1, 2, 3, 0).cpu().numpy()
     imageio.mimwrite(path, vid, fps=fps)
 @spaces.GPU(duration=120)
 def generate(input_image, prompt, length, steps, shift, seed, guidance):
     if pipe is None:
@@ -161,11 +199,17 @@ def generate(input_image, prompt, length, steps, shift, seed, guidance):
     if seed == -1: seed = torch.randint(0, 1000000, (1,)).item()
     generator = torch.Generator(device="cpu").manual_seed(int(seed))
-    print(f"Generating: {prompt} | Seed: {seed}")
     try:
         pipe.execution_device = torch.device("cuda")
         output = pipe(
             prompt=prompt,
             height=480, width=854, aspect_ratio="16:9",
@@ -180,28 +224,35 @@ def generate(input_image, prompt, length, steps, shift, seed, guidance):
             enable_sr=False,
             return_dict=True
         )
     except Exception as e:
         raise gr.Error(f"Inference Failed: {e}")
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     os.makedirs("outputs", exist_ok=True)
     output_path = f"outputs/gen_{timestamp}.mp4"
     save_video_tensor(output.videos, output_path)
     return output_path
-# --- Part 4: UI Definition & Launch ---
 def create_ui():
     with gr.Blocks(title="HunyuanVideo 1.5 I2V") as demo:
         gr.Markdown(f"### 🎬 HunyuanVideo 1.5 I2V ({TRANSFORMER_VERSION})")
         with gr.Row():
             with gr.Column():
                 img = gr.Image(label="Reference", type="pil", height=250)
                 prompt = gr.Textbox(label="Prompt", placeholder="Describe motion...", lines=2)
                 with gr.Row():
-                    steps = gr.Slider(2, 20, value=6, step=1, label="Steps")
                     guidance = gr.Slider(1.0, 5.0, value=1.0, step=0.1, label="Guidance")
                 with gr.Row():
                     shift = gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="Shift")
@@ -216,6 +267,6 @@ def create_ui():
     return demo
 if __name__ == "__main__":
-    # 2. Launch UI
     ui = create_ui()
     ui.queue().launch(server_name="0.0.0.0", share=True)

 import numpy as np
 from PIL import Image
 import imageio
+import shutil
 # --- Part 1: Auto-Setup (Clone Repo & Download Weights) ---
 REPO_URL = "https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5.git"
 REPO_DIR = os.path.abspath("HunyuanVideo-1.5")
+MODEL_DIR = os.path.abspath("ckpts")
+# Repositories
+HF_MAIN_REPO = "tencent/HunyuanVideo-1.5"
+HF_GLYPH_REPO = "multimodalart/glyph-sdxl-v2-byt5-small"
 # Configuration
 TRANSFORMER_VERSION = "480p_i2v_distilled"
 DTYPE = torch.bfloat16
+# ZeroGPU: Set False so we control offloading manually (CPU -> GPU -> CPU)
+ENABLE_OFFLOADING = False
 def setup_environment():
     print("=" * 50)
     print("Checking Environment & Dependencies...")
+    # 1. Clone Code Repository
     if not os.path.exists(REPO_DIR):
         print(f"Cloning repository to {REPO_DIR}...")
         subprocess.run(["git", "clone", REPO_URL, REPO_DIR], check=True)
     # 2. Add Repo to Python Path
     if REPO_DIR not in sys.path:
         sys.path.insert(0, REPO_DIR)
+    # 3. Download Main Weights
+    os.makedirs(MODEL_DIR, exist_ok=True)
+    target_transformer = os.path.join(MODEL_DIR, "transformer", TRANSFORMER_VERSION)
+    if not os.path.exists(target_transformer):
+        print(f"Downloading Main Weights from {HF_MAIN_REPO}...")
         try:
             from huggingface_hub import snapshot_download
             allow_patterns = [
                 "tokenizer/*"
             ]
             snapshot_download(
+                repo_id=HF_MAIN_REPO,
                 local_dir=MODEL_DIR,
+                allow_patterns=allow_patterns,
+                local_dir_use_symlinks=False
             )
         except Exception as e:
+            print(f"Error downloading main weights: {e}")
             sys.exit(1)
+    # 4. Download & Restructure Glyph Weights
+    # The pipeline expects: ckpts/text_encoder/Glyph-SDXL-v2/checkpoints/byt5_model.pt
+    glyph_root = os.path.join(MODEL_DIR, "text_encoder", "Glyph-SDXL-v2")
+    glyph_ckpt_target = os.path.join(glyph_root, "checkpoints", "byt5_model.pt")
+    if not os.path.exists(glyph_ckpt_target):
+        print(f"Downloading & Structuring Glyph Weights from {HF_GLYPH_REPO}...")
+        try:
+            from huggingface_hub import snapshot_download
+            # Download to a temp folder first
+            glyph_temp = os.path.join(MODEL_DIR, "glyph_temp")
+            snapshot_download(
+                repo_id=HF_GLYPH_REPO,
+                local_dir=glyph_temp,
+                local_dir_use_symlinks=False
+            )
+            # Create target structure
+            os.makedirs(os.path.join(glyph_root, "assets"), exist_ok=True)
+            os.makedirs(os.path.join(glyph_root, "checkpoints"), exist_ok=True)
+            # Move Assets (color_idx.json, etc.)
+            src_assets = os.path.join(glyph_temp, "assets")
+            if os.path.exists(src_assets):
+                for f in os.listdir(src_assets):
+                    shutil.copy(os.path.join(src_assets, f), os.path.join(glyph_root, "assets", f))
+            # Move & Rename Model (pytorch_model.bin -> byt5_model.pt)
+            # Try bin first, then safetensors (code usually loads via torch.load, so bin/pt is safer)
+            src_bin = os.path.join(glyph_temp, "pytorch_model.bin")
+            if os.path.exists(src_bin):
+                print(" moving pytorch_model.bin -> byt5_model.pt")
+                shutil.move(src_bin, glyph_ckpt_target)
+            else:
+                # Fallback if repo changes structure
+                print("Warning: pytorch_model.bin not found, looking for safetensors...")
+                src_safe = os.path.join(glyph_temp, "model.safetensors")
+                if os.path.exists(src_safe):
+                     # Note: Standard torch.load might fail on safetensors if code expects pickle,
+                     # but let's try.
+                     shutil.move(src_safe, glyph_ckpt_target)
+            # Clean up temp
+            shutil.rmtree(glyph_temp, ignore_errors=True)
+            print("Glyph setup complete.")
+        except Exception as e:
+            print(f"Error setting up Glyph weights: {e}")
+            # Don't exit, maybe the model can run without it if config tweaked,
+            # but likely it will fail later.
+            pass
     print("Environment Ready.")
     print("=" * 50)
 setup_environment()
+# --- Part 2: Imports & Monkey Patching ---
+# 1. Import Modules explicitly for patching
 try:
     import hyvideo.commons
     import hyvideo.pipelines.hunyuan_video_pipeline
+    from hyvideo.pipelines.hunyuan_video_pipeline import HunyuanVideo_1_5_Pipeline
+    from hyvideo.commons.infer_state import initialize_infer_state
+    import spaces
 except ImportError as e:
     print(f"CRITICAL ERROR: {e}")
     sys.exit(1)
 import gradio as gr
+# 2. Apply ZeroGPU Monkey Patch
+# We must patch the specific modules where get_gpu_memory is imported/used
 def dummy_get_gpu_memory(device=None):
+    return 80 * 1024 * 1024 * 1024 # Spoof 80GB
+print("🛠️  Applying ZeroGPU Monkey Patch...")
 hyvideo.commons.get_gpu_memory = dummy_get_gpu_memory
 hyvideo.pipelines.hunyuan_video_pipeline.get_gpu_memory = dummy_get_gpu_memory
+# --- Part 3: Model Initialization (CPU) ---
 class ArgsNamespace:
     def __init__(self):
         self.use_sageattn = False
         self.sage_blocks_range = "0-53"
         self.enable_torch_compile = False
 initialize_infer_state(ArgsNamespace())
 pipe = None
+def pre_load_model():
+    global pipe
+    print(f"⏳ Initializing Pipeline ({TRANSFORMER_VERSION})...")
+    try:
+        # Load to CPU explicitly
+        pipe = HunyuanVideo_1_5_Pipeline.create_pipeline(
+            pretrained_model_name_or_path=MODEL_DIR,
+            transformer_version=TRANSFORMER_VERSION,
+            enable_offloading=ENABLE_OFFLOADING,
+            enable_group_offloading=ENABLE_OFFLOADING,
+            transformer_dtype=DTYPE,
+            device=torch.device('cpu')
+        )
+        print("✅ Model loaded into CPU RAM.")
+    except Exception as e:
+        print(f"❌ Failed to load model: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
 def save_video_tensor(video_tensor, path, fps=24):
     if isinstance(video_tensor, list): video_tensor = video_tensor[0]
     vid = vid.permute(1, 2, 3, 0).cpu().numpy()
     imageio.mimwrite(path, vid, fps=fps)
+# --- Part 4: Inference ---
 @spaces.GPU(duration=120)
 def generate(input_image, prompt, length, steps, shift, seed, guidance):
     if pipe is None:
     if seed == -1: seed = torch.randint(0, 1000000, (1,)).item()
     generator = torch.Generator(device="cpu").manual_seed(int(seed))
+    print(f"🚀 Moving Pipeline to GPU... (Prompt: {prompt})")
     try:
+        # 1. Move Weights
+        pipe.to("cuda")
+        # 2. FIX: Manually update internal device reference
+        # (Hunyuan uses this attribute instead of .device in some places)
         pipe.execution_device = torch.device("cuda")
+        # 3. Run Inference
         output = pipe(
             prompt=prompt,
             height=480, width=854, aspect_ratio="16:9",
             enable_sr=False,
             return_dict=True
         )
+        # 4. Optional: Move back to CPU?
+        # pipe.to("cpu")
     except Exception as e:
+        print(f"Generation Error: {e}")
+        import traceback
+        traceback.print_exc()
         raise gr.Error(f"Inference Failed: {e}")
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     os.makedirs("outputs", exist_ok=True)
     output_path = f"outputs/gen_{timestamp}.mp4"
     save_video_tensor(output.videos, output_path)
     return output_path
+# --- Part 5: UI ---
 def create_ui():
     with gr.Blocks(title="HunyuanVideo 1.5 I2V") as demo:
         gr.Markdown(f"### 🎬 HunyuanVideo 1.5 I2V ({TRANSFORMER_VERSION})")
+        gr.Markdown("Running on ZeroGPU. Weights are pre-loaded on CPU.")
         with gr.Row():
             with gr.Column():
                 img = gr.Image(label="Reference", type="pil", height=250)
                 prompt = gr.Textbox(label="Prompt", placeholder="Describe motion...", lines=2)
                 with gr.Row():
+                    steps = gr.Slider(2, 50, value=6, step=1, label="Steps")
                     guidance = gr.Slider(1.0, 5.0, value=1.0, step=0.1, label="Guidance")
                 with gr.Row():
                     shift = gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="Shift")
     return demo
 if __name__ == "__main__":
+    pre_load_model()
     ui = create_ui()
     ui.queue().launch(server_name="0.0.0.0", share=True)