multimodalart HF Staff commited on
Commit
7b20338
·
verified ·
1 Parent(s): b3d4063

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -38,6 +38,7 @@ import torch
38
  from diffusers import (
39
  AutoencoderKLWan,
40
  HeliosPyramidPipeline,
 
41
  )
42
  from diffusers.utils import export_to_video, load_image, load_video
43
 
@@ -47,8 +48,16 @@ from diffusers.utils import export_to_video, load_image, load_video
47
  MODEL_ID = "BestWishYsh/Helios-Distilled"
48
 
49
  vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
50
- pipe = HeliosPyramidPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
 
 
 
 
 
 
 
51
  pipe.to("cuda")
 
52
 
53
  DEFAULT_NEGATIVE_PROMPT = (
54
  "Bright tones, overexposed, static, blurred details, subtitles, style, works, "
@@ -66,7 +75,6 @@ DEFAULT_NEGATIVE_PROMPT = (
66
  def generate_video(
67
  mode: str,
68
  prompt: str,
69
- negative_prompt: str,
70
  image_input,
71
  video_input,
72
  height: int,
@@ -85,7 +93,6 @@ def generate_video(
85
 
86
  kwargs = {
87
  "prompt": prompt,
88
- "negative_prompt": negative_prompt,
89
  "height": int(height),
90
  "width": int(width),
91
  "num_frames": int(num_frames),
@@ -157,11 +164,6 @@ with gr.Blocks(css=CSS, title="Helios Video Generation", theme=gr.themes.Soft())
157
  "Medium shot focusing on the train window and the rushing scenery beyond."
158
  ),
159
  )
160
- negative_prompt = gr.Textbox(
161
- label="Negative Prompt",
162
- lines=2,
163
- value=DEFAULT_NEGATIVE_PROMPT,
164
- )
165
 
166
  with gr.Accordion("Conditional Inputs (I2V / V2V)", open=False):
167
  image_input = gr.Image(label="Image (for I2V)", type="filepath")
@@ -172,7 +174,7 @@ with gr.Blocks(css=CSS, title="Helios Video Generation", theme=gr.themes.Soft())
172
  height = gr.Slider(128, 768, value=384, step=16, label="Height")
173
  width = gr.Slider(128, 1280, value=640, step=16, label="Width")
174
  with gr.Row():
175
- num_frames = gr.Slider(9, 240, value=99, step=1, label="Num Frames")
176
  num_inference_steps = gr.Slider(
177
  1, 10, value=2, step=1, label="Steps (per pyramid stage)"
178
  )
@@ -195,7 +197,6 @@ with gr.Blocks(css=CSS, title="Helios Video Generation", theme=gr.themes.Soft())
195
  inputs=[
196
  mode,
197
  prompt,
198
- negative_prompt,
199
  image_input,
200
  video_input,
201
  height,
 
38
  from diffusers import (
39
  AutoencoderKLWan,
40
  HeliosPyramidPipeline,
41
+ HeliosDMDScheduler
42
  )
43
  from diffusers.utils import export_to_video, load_image, load_video
44
 
 
48
  MODEL_ID = "BestWishYsh/Helios-Distilled"
49
 
50
  vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
51
+ scheduler = HeliosDMDScheduler.from_pretrained(MODEL_ID, subfolder="scheduler")
52
+ pipe = HeliosPyramidPipeline.from_pretrained(
53
+ MODEL_ID,
54
+ vae=vae,
55
+ scheduler=scheduler,
56
+ torch_dtype=torch.bfloat16,
57
+ is_distilled=True
58
+ )
59
  pipe.to("cuda")
60
+ pipe.transformer.set_attention_backend("_flash_3_hub")
61
 
62
  DEFAULT_NEGATIVE_PROMPT = (
63
  "Bright tones, overexposed, static, blurred details, subtitles, style, works, "
 
75
  def generate_video(
76
  mode: str,
77
  prompt: str,
 
78
  image_input,
79
  video_input,
80
  height: int,
 
93
 
94
  kwargs = {
95
  "prompt": prompt,
 
96
  "height": int(height),
97
  "width": int(width),
98
  "num_frames": int(num_frames),
 
164
  "Medium shot focusing on the train window and the rushing scenery beyond."
165
  ),
166
  )
 
 
 
 
 
167
 
168
  with gr.Accordion("Conditional Inputs (I2V / V2V)", open=False):
169
  image_input = gr.Image(label="Image (for I2V)", type="filepath")
 
174
  height = gr.Slider(128, 768, value=384, step=16, label="Height")
175
  width = gr.Slider(128, 1280, value=640, step=16, label="Width")
176
  with gr.Row():
177
+ num_frames = gr.Slider(9, 240, value=240, step=1, label="Num Frames")
178
  num_inference_steps = gr.Slider(
179
  1, 10, value=2, step=1, label="Steps (per pyramid stage)"
180
  )
 
197
  inputs=[
198
  mode,
199
  prompt,
 
200
  image_input,
201
  video_input,
202
  height,