DS_TextToVideo / app.py
LimaRaed's picture
Update app.py
2f7fdab verified
import gradio as gr
from diffusers import DiffusionPipeline
import torch
import numpy as np
from PIL import Image
import time
import warnings
warnings.filterwarnings("ignore")
# Set to use CPU
torch_device = "cpu"
torch_dtype = torch.float32
def load_model():
model_id = "damo-vilab/text-to-video-ms-1.7b"
pipe = DiffusionPipeline.from_pretrained(
model_id,
torch_dtype=torch_dtype
)
pipe = pipe.to(torch_device)
pipe.enable_attention_slicing()
return pipe
def generate_video(prompt, num_frames=8, num_inference_steps=20):
start_time = time.time()
if not hasattr(generate_video, "pipe"):
generate_video.pipe = load_model()
with torch.no_grad():
output = generate_video.pipe(
prompt,
num_frames=min(num_frames, 8),
num_inference_steps=min(num_inference_steps, 20),
height=256,
width=256
)
# Correct frame conversion - handle the 4D array properly
video_frames = output.frames
if isinstance(video_frames, np.ndarray):
# Reshape from (1, num_frames, height, width, 3) to (num_frames, height, width, 3)
if video_frames.ndim == 5:
video_frames = video_frames[0] # Remove batch dimension
frames = []
for frame in video_frames:
# Convert to 8-bit and ensure correct channel order
frame = (frame * 255).astype(np.uint8)
frames.append(Image.fromarray(frame))
else:
raise ValueError("Unexpected frame format")
# Create GIF
gif_path = "output.gif"
frames[0].save(
gif_path,
save_all=True,
append_images=frames[1:],
duration=100, # 100ms per frame
loop=0,
quality=80
)
print(f"Generation took {time.time() - start_time:.2f} seconds")
return gif_path
# Gradio Interface
with gr.Blocks(title="CPU Text-to-Video") as demo:
gr.Markdown("# 🐢 CPU Text-to-Video Generator")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt")
with gr.Accordion("Advanced Options", open=False):
frames = gr.Slider(4, 12, value=8, step=4, label="Frames")
steps = gr.Slider(10, 30, value=20, step=5, label="Steps")
submit = gr.Button("Generate")
with gr.Column():
output = gr.Image(label="Result", format="gif")
gr.Markdown("Note: CPU generation may take several minutes")
submit.click(
fn=generate_video,
inputs=[prompt, frames, steps],
outputs=output
)
demo.launch()