Transformers
Diffusers
Safetensors
wruisi commited on
Commit
0970deb
ยท
verified ยท
1 Parent(s): b6219b6

Upload example.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. example.py +93 -0
example.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ VBVR-Wan2.2 Image-to-Video Inference Example
4
+
5
+ Generate a video from a reference image using the VBVR-Wan2.2 model.
6
+ Usage:
7
+ python inference.py --model_path /path/to/VBVR-Wan2.2
8
+ """
9
+
10
+ import os
11
+ import torch
12
+ from PIL import Image
13
+ from diffusers import WanImageToVideoPipeline, AutoencoderKLWan
14
+ from diffusers.utils import export_to_video
15
+
16
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Configuration (only change model_path) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
17
+ import argparse
18
+ parser = argparse.ArgumentParser()
19
+ parser.add_argument("--model_path", type=str, default="VBVR-Wan2.2")
20
+ args = parser.parse_args()
21
+ model_path = args.model_path
22
+
23
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
24
+
25
+ # Paths derived from model_path
26
+ image_path = os.path.join(model_path, "assets", "first_frame.png")
27
+ output_path = "output.mp4"
28
+
29
+ # Prompt
30
+ prompt = (
31
+ "The scene contains two types of shapes, each type has three shapes of "
32
+ "different sizes arranged randomly. Keep all shapes unchanged in appearance "
33
+ "(type, size, and color). Only rearrange their positions: first group the "
34
+ "shapes by type, then within each group, sort the shapes from smallest to "
35
+ "largest (left to right), and arrange all shapes in a single horizontal "
36
+ "line from left to right."
37
+ )
38
+ negative_prompt = (
39
+ "่‰ฒ่ฐƒ่‰ณไธฝ๏ผŒ่ฟ‡ๆ›๏ผŒ้™ๆ€๏ผŒ็ป†่Š‚ๆจก็ณŠไธๆธ…๏ผŒๅญ—ๅน•๏ผŒ้ฃŽๆ ผ๏ผŒไฝœๅ“๏ผŒ็”ปไฝœ๏ผŒ็”ป้ข๏ผŒ้™ๆญข๏ผŒ"
40
+ "ๆ•ดไฝ“ๅ‘็ฐ๏ผŒๆœ€ๅทฎ่ดจ้‡๏ผŒไฝŽ่ดจ้‡๏ผŒJPEGๅŽ‹็ผฉๆฎ‹็•™๏ผŒไธ‘้™‹็š„๏ผŒๆฎ‹็ผบ็š„๏ผŒๅคšไฝ™็š„ๆ‰‹ๆŒ‡๏ผŒ"
41
+ "็”ปๅพ—ไธๅฅฝ็š„ๆ‰‹้ƒจ๏ผŒ็”ปๅพ—ไธๅฅฝ็š„่„ธ้ƒจ๏ผŒ็•ธๅฝข็š„๏ผŒๆฏๅฎน็š„๏ผŒๅฝขๆ€็•ธๅฝข็š„่‚ขไฝ“๏ผŒๆ‰‹ๆŒ‡่žๅˆ๏ผŒ"
42
+ "้™ๆญขไธๅŠจ็š„็”ป้ข๏ผŒๆ‚ไนฑ็š„่ƒŒๆ™ฏ๏ผŒไธ‰ๆก่…ฟ๏ผŒ่ƒŒๆ™ฏไบบๅพˆๅคš๏ผŒๅ€’็€่ตฐ"
43
+ )
44
+
45
+ # Generation settings
46
+ num_frames = 96
47
+ num_inference_steps = 50
48
+ guidance_scale = 5.0
49
+ seed = 1
50
+
51
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Load Pipeline โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
52
+
53
+ print(f"Loading model from: {model_path}")
54
+
55
+ vae = AutoencoderKLWan.from_pretrained(
56
+ model_path, subfolder="vae", torch_dtype=torch.float32
57
+ )
58
+
59
+ pipe = WanImageToVideoPipeline.from_pretrained(
60
+ model_path,
61
+ vae=vae,
62
+ torch_dtype=torch.bfloat16,
63
+ )
64
+ pipe.enable_model_cpu_offload()
65
+
66
+ print(f"Pipeline loaded. boundary_ratio = {pipe.config.boundary_ratio}")
67
+
68
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Load Image โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
69
+
70
+ print(f"Loading image: {image_path}")
71
+ image = Image.open(image_path).convert("RGB")
72
+ width, height = image.size
73
+ print(f"Image size: {width}x{height}")
74
+
75
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Generate Video โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
76
+
77
+ print(f"Generating video: {num_frames} frames @ {width}x{height}, {num_inference_steps} steps")
78
+ generator = torch.Generator(device="cpu").manual_seed(seed)
79
+
80
+ output = pipe(
81
+ image=image,
82
+ prompt=prompt,
83
+ negative_prompt=negative_prompt,
84
+ height=height,
85
+ width=width,
86
+ num_frames=num_frames,
87
+ num_inference_steps=num_inference_steps,
88
+ guidance_scale=guidance_scale,
89
+ generator=generator,
90
+ )
91
+
92
+ export_to_video(output.frames[0], output_path, fps=16)
93
+ print(f"Video saved to: {output_path}")