| import gradio as gr |
| import torch |
| import numpy as np |
| import random |
| import os |
| from diffusers import DiffusionPipeline |
| import imageio |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
| |
| pipe = DiffusionPipeline.from_pretrained("stepfun-ai/stepvideo-t2v", torch_dtype=torch_dtype) |
| pipe = pipe.to(device) |
|
|
| MAX_SEED = np.iinfo(np.int32).max |
|
|
| def infer(prompt, seed, randomize_seed, num_inference_steps): |
| if randomize_seed: |
| seed = random.randint(0, MAX_SEED) |
| generator = torch.manual_seed(seed) |
|
|
| output = pipe(prompt=prompt, num_inference_steps=num_inference_steps, generator=generator) |
| frames = output.frames[0] |
|
|
| video_path = "/tmp/video.mp4" |
| imageio.mimsave(video_path, frames, fps=8) |
|
|
| return video_path, seed |
|
|
| examples = [ |
| "Astronaut dancing on Mars, cinematic lighting", |
| "A cat flying through the city on a skateboard", |
| "Robot chef cooking in a futuristic kitchen" |
| ] |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# Text-to-Video with `stepvideo-t2v`") |
|
|
| with gr.Row(): |
| prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here") |
| run_btn = gr.Button("Generate Video") |
|
|
| with gr.Row(): |
| video_output = gr.Video(label="Generated Video") |
|
|
| with gr.Accordion("Advanced Settings", open=False): |
| seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) |
| randomize_seed = gr.Checkbox(label="Randomize seed", value=True) |
| num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, value=25) |
|
|
| gr.Examples(examples=examples, inputs=[prompt]) |
|
|
| run_btn.click(fn=infer, inputs=[prompt, seed, randomize_seed, num_inference_steps], outputs=[video_output, seed]) |
|
|
| if __name__ == "__main__": |
| demo.launch() |