Spaces:
Running on Zero
Running on Zero
File size: 10,113 Bytes
1775cd6 16ca8db 1775cd6 16ca8db 9f13b69 1775cd6 5d3bb27 9f13b69 5d3bb27 f629787 dacc5a0 f629787 5d3bb27 f629787 8ddf1ab 5d3bb27 1775cd6 ee824ed 1775cd6 7b20338 1775cd6 14237e0 1775cd6 9f13b69 1775cd6 7b20338 14237e0 34757ae 14237e0 1775cd6 53123ce cb99a0b 1775cd6 9f13b69 1775cd6 ee824ed 1775cd6 9ea2322 9f13b69 9ea2322 1775cd6 b3d4063 1775cd6 a24b191 1775cd6 9f13b69 1775cd6 d792322 441905a d792322 1775cd6 34c9450 1775cd6 441905a 9f13b69 1775cd6 9f13b69 1775cd6 9f13b69 1775cd6 9f13b69 1775cd6 441905a 1775cd6 441905a 1775cd6 441905a 1775cd6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | import os
import subprocess
import sys
import time
import tempfile
import zipfile
import torch
# ---------------------------------------------------------------------------
# Install private diffusers fork
# ---------------------------------------------------------------------------
_APP_DIR = os.path.dirname(os.path.abspath(__file__))
ZIP_PATH = os.path.join(_APP_DIR, "helios_diffusers.zip")
EXTRACT_DIR = os.path.join(_APP_DIR, "_helios_diffusers")
_PKG_ROOT = os.path.join(EXTRACT_DIR, "diffusers-new-model-addition-helios-helios")
if not os.path.isdir(_PKG_ROOT):
print(f"[setup] Extracting {ZIP_PATH}")
with zipfile.ZipFile(ZIP_PATH, "r") as zf:
zf.extractall(EXTRACT_DIR)
print(f"[setup] Installing diffusers from {_PKG_ROOT}")
try:
subprocess.check_call([sys.executable, "-m", "pip", "install", _PKG_ROOT])
except subprocess.CalledProcessError as e:
print(f"[setup] pip install failed (exit {e.returncode}), falling back to sys.path")
_SRC_DIR = os.path.join(_PKG_ROOT, "src")
if os.path.isdir(_SRC_DIR):
sys.path.insert(0, _SRC_DIR)
import gradio as gr
import spaces
from diffusers import (
AutoencoderKLWan,
HeliosPyramidPipeline,
HeliosDMDScheduler
)
from diffusers.utils import export_to_video, load_image, load_video
from aoti import aoti_load_
# ---------------------------------------------------------------------------
# Pre-load model
# ---------------------------------------------------------------------------
MODEL_ID = "BestWishYsh/Helios-Distilled"
vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
scheduler = HeliosDMDScheduler.from_pretrained(MODEL_ID, subfolder="scheduler")
pipe = HeliosPyramidPipeline.from_pretrained(
MODEL_ID,
vae=vae,
scheduler=scheduler,
torch_dtype=torch.bfloat16,
is_distilled=True
)
# aoti_load_(pipe.transformer, "multimodalart/helios-distilled-transformer", "helios_distilled_transformer.pt2")
pipe.to("cuda")
pipe.transformer.set_attention_backend("_flash_3_hub")
# ---------------------------------------------------------------------------
# Generation
# ---------------------------------------------------------------------------
@spaces.GPU(duration=300)
def generate_video(
mode: str,
prompt: str,
image_input,
video_input,
height: int,
width: int,
num_frames: int,
num_inference_steps: int,
seed: int,
is_amplify_first_chunk: bool,
progress=gr.Progress(track_tqdm=True),
):
if not prompt:
raise gr.Error("Please provide a prompt.")
generator = torch.Generator(device="cuda").manual_seed(int(seed))
kwargs = {
"prompt": prompt,
"height": int(height),
"width": int(width),
"num_frames": int(num_frames),
"guidance_scale": 1.0,
"generator": generator,
"output_type": "np",
"pyramid_num_inference_steps_list": [
int(num_inference_steps),
int(num_inference_steps),
int(num_inference_steps),
],
"is_amplify_first_chunk": is_amplify_first_chunk,
}
if mode == "Image-to-Video" and image_input is not None:
img = load_image(image_input).resize((int(width), int(height)))
kwargs["image"] = img
elif mode == "Video-to-Video" and video_input is not None:
kwargs["video"] = load_video(video_input)
t0 = time.time()
output = pipe(**kwargs).frames[0]
elapsed = time.time() - t0
tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
export_to_video(output, tmp.name, fps=24)
info = f"Generated in {elapsed:.1f}s · {num_frames} frames · {height}×{width}"
return tmp.name, info
# ---------------------------------------------------------------------------
# UI Setup
# ---------------------------------------------------------------------------
def update_conditional_visibility(mode):
if mode == "Image-to-Video":
return gr.update(visible=True), gr.update(visible=False)
elif mode == "Video-to-Video":
return gr.update(visible=False), gr.update(visible=True)
else:
return gr.update(visible=False), gr.update(visible=False)
CSS = """
#header { text-align: center; margin-bottom: 0.5em; }
#header h1 { font-size: 2.2em; margin-bottom: 0; }
.contain { max-width: 1350px; margin: 0 auto !important; }
"""
with gr.Blocks(css=CSS, title="Helios Video Generation", theme=gr.themes.Soft()) as demo:
gr.HTML(
"""
<div id="header">
<h1>🎬 Helios 14B distilled</h1>
</div>
"""
)
with gr.Row():
with gr.Column(scale=1):
mode = gr.Radio(
choices=["Text-to-Video", "Image-to-Video", "Video-to-Video"],
value="Text-to-Video",
label="Generation Mode",
)
image_input = gr.Image(label="Image (for I2V)", type="filepath", visible=False)
video_input = gr.Video(label="Video (for V2V)", visible=False)
prompt = gr.Textbox(
label="Prompt",
lines=4,
value=(
"A vibrant tropical fish swimming gracefully among colorful coral reefs in "
"a clear, turquoise ocean. The fish has bright blue and yellow scales with a "
"small, distinctive orange spot on its side, its fins moving fluidly. The coral "
"reefs are alive with a variety of marine life, including small schools of "
"colorful fish and sea turtles gliding by. The water is crystal clear, allowing "
"for a view of the sandy ocean floor below. The reef itself is adorned with a mix "
"of hard and soft corals in shades of red, orange, and green. The photo captures "
"the fish from a slightly elevated angle, emphasizing its lively movements and the "
"vivid colors of its surroundings. A close-up shot with dynamic movement."
)
)
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
height = gr.Number(value=384, label="Height", precision=0, interactive=False)
width = gr.Number(value=640, label="Width", precision=0, interactive=False)
with gr.Row():
num_frames = gr.Slider(33, 231, value=231, step=33, label="Num Frames")
num_inference_steps = gr.Slider(1, 10, value=2, step=1, label="Steps per stage")
with gr.Row():
seed = gr.Number(value=42, label="Seed", precision=0)
is_amplify_first_chunk = gr.Checkbox(label="Amplify First Chunk", value=True)
generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg")
with gr.Column(scale=1):
video_output = gr.Video(label="Generated Video", autoplay=True)
info_output = gr.Textbox(label="Info", interactive=False)
mode.change(fn=update_conditional_visibility, inputs=[mode], outputs=[image_input, video_input])
generate_btn.click(
fn=generate_video,
inputs=[mode, prompt, image_input, video_input, height, width, num_frames, num_inference_steps, seed, is_amplify_first_chunk],
outputs=[video_output, info_output],
)
gr.Examples(
examples=[
[
"Text-to-Video",
"A vibrant tropical fish swimming gracefully among colorful coral reefs in "
"a clear, turquoise ocean. The fish has bright blue and yellow scales with a "
"small, distinctive orange spot on its side, its fins moving fluidly. The coral "
"reefs are alive with a variety of marine life, including small schools of "
"colorful fish and sea turtles gliding by. The water is crystal clear, allowing "
"for a view of the sandy ocean floor below. The reef itself is adorned with a mix "
"of hard and soft corals in shades of red, orange, and green. The photo captures "
"the fish from a slightly elevated angle, emphasizing its lively movements and the "
"vivid colors of its surroundings. A close-up shot with dynamic movement.",
],
[
"Text-to-Video",
"An extreme close-up of an gray-haired man with a beard in his 60s, he is deep in "
"thought pondering the history of the universe as he sits at a cafe in Paris, his eyes "
"focus on people offscreen as they walk as he sits mostly motionless, he is dressed in "
"a wool coat suit coat with a button-down shirt , he wears a brown beret and glasses "
"and has a very professorial appearance, and the end he offers a subtle closed-mouth "
"smile as if he found the answer to the mystery of life, the lighting is very cinematic "
"with the golden light and the Parisian streets and city in the background, depth of "
"field, cinematic 35mm film.",
],
[
"Text-to-Video",
"A drone camera circles around a beautiful historic church built on a rocky outcropping "
"along the Amalfi Coast, the view showcases historic and magnificent architectural "
"details and tiered pathways and patios, waves are seen crashing against the rocks "
"below as the view overlooks the horizon of the coastal waters and hilly landscapes "
"of the Amalfi Coast Italy, several distant people are seen walking and enjoying vistas "
"on patios of the dramatic ocean views, the warm glow of the afternoon sun creates a "
"magical and romantic feeling to the scene, the view is stunning captured with beautiful photography.",
],
],
inputs=[mode, prompt],
label="Example Prompts",
)
if __name__ == "__main__":
demo.launch() |