Spaces:

Gjm1234
/

tenexa

Paused

App Files Files Community

Gjm1234 commited on Nov 30, 2025

Commit

f6448f0

verified ·

1 Parent(s): 50b2c3d

Update custom_wan_pipeline.py

Browse files

Files changed (1) hide show

custom_wan_pipeline.py +33 -46

custom_wan_pipeline.py CHANGED Viewed

@@ -1,55 +1,42 @@
 import torch
 from diffusers import DiffusionPipeline
-from diffusers.utils import export_to_video
 from PIL import Image
 import numpy as np
-import tempfile
-import os
 class WanImageToVideoPipeline(DiffusionPipeline):
-    """
-    Custom WAN 2.2 I2V pipeline – converts a single still image into a short animated clip.
-    """
-    def __init__(self, *args, **kwargs):
-        # ✅ Accepts both positional and keyword args properly
-        super().__init__(*args, **kwargs)
-        self.__dict__.update(kwargs)
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.to(self.device)
-        print(f"✅ Custom WAN 2.2 I2V pipeline initialized on {self.device}")
     @torch.no_grad()
-    def __call__(self, image, num_inference_steps=25, motion_scale=1.0, guidance_scale=7.5):
-        if image is None:
-            raise ValueError("No image provided for video generation.")
-        self.scheduler.set_timesteps(num_inference_steps)
-        if isinstance(image, Image.Image):
-            arr = np.array(image.convert("RGB")).astype(np.float32) / 255.0
-            tensor = torch.from_numpy(arr).permute(2, 0, 1).unsqueeze(0)
-        else:
-            tensor = image
-        tensor = tensor.to(self.device, dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
-        latents = tensor.clone()
-        if hasattr(self, "image_encoder") and self.image_encoder is not None:
-            latents = self.image_encoder(tensor).to(self.device)
-        latents = latents * motion_scale
-        for t in self.scheduler.timesteps:
-            noise_pred = self.transformer(latents, t)
-            latents = self.scheduler.step(noise_pred, t, latents).prev_sample
-        frames = self.vae.decode(latents / 0.18215).sample
-        frames = (frames.clamp(-1, 1) + 1) / 2
-        frames = (frames * 255).round().byte().cpu().permute(0, 2, 3, 1).numpy()
-        pil_frames = [Image.fromarray(f) for f in frames]
-        tmpdir = tempfile.mkdtemp()
-        out_path = os.path.join(tmpdir, "wan2v_output.mp4")
-        export_to_video(pil_frames, out_path, fps=12)
-        print(f"🎬 Generated {len(pil_frames)} frames → {out_path}")
-        return {"frames": pil_frames, "video_path": out_path}

 import torch
 from diffusers import DiffusionPipeline
+from diffusers.utils import logging
 from PIL import Image
 import numpy as np
+logger = logging.get_logger(__name__)
 class WanImageToVideoPipeline(DiffusionPipeline):
+    def __init__(self, vae, transformer, scheduler, text_encoder, tokenizer, image_encoder):
+        super().__init__()
+        self.vae = vae
+        self.transformer = transformer
+        self.scheduler = scheduler
+        self.text_encoder = text_encoder
+        self.tokenizer = tokenizer
+        self.image_encoder = image_encoder
     @torch.no_grad()
+    def __call__(self, image: Image.Image, prompt: str = "", num_frames: int = 16, num_inference_steps: int = 25):
+        logger.info("✅ Generating latent motion sequence...")
+        image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).unsqueeze(0) / 255.0
+        image_tensor = image_tensor.to(self.device, dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
+        # Dummy latent sampling for demonstration
+        latents = self.vae.encode(image_tensor).latent_dist.sample() * 0.18215
+        latents = torch.randn_like(latents)
+        frames = []
+        for i in range(num_frames):
+            noise = torch.randn_like(latents)
+            frame = latents + 0.05 * i * noise
+            decoded = self.vae.decode(frame / 0.18215).sample
+            decoded = (decoded.clamp(-1, 1) + 1) / 2
+            frame_img = (decoded * 255).cpu().numpy().astype("uint8")[0].transpose(1, 2, 0)
+            frames.append(Image.fromarray(frame_img))
+        # Simple video assembly (you can later swap this for real motion)
+        import imageio
+        output_path = "output.mp4"
+        imageio.mimsave(output_path, frames, fps=12)
+        return type("Result", (), {"videos": [output_path]})