import gradio as gr from diffusers import DiffusionPipeline import torch import numpy as np from PIL import Image import time import warnings warnings.filterwarnings("ignore") # Set to use CPU torch_device = "cpu" torch_dtype = torch.float32 def load_model(): model_id = "damo-vilab/text-to-video-ms-1.7b" pipe = DiffusionPipeline.from_pretrained( model_id, torch_dtype=torch_dtype ) pipe = pipe.to(torch_device) pipe.enable_attention_slicing() return pipe def generate_video(prompt, num_frames=8, num_inference_steps=20): start_time = time.time() if not hasattr(generate_video, "pipe"): generate_video.pipe = load_model() with torch.no_grad(): output = generate_video.pipe( prompt, num_frames=min(num_frames, 8), num_inference_steps=min(num_inference_steps, 20), height=256, width=256 ) # Correct frame conversion - handle the 4D array properly video_frames = output.frames if isinstance(video_frames, np.ndarray): # Reshape from (1, num_frames, height, width, 3) to (num_frames, height, width, 3) if video_frames.ndim == 5: video_frames = video_frames[0] # Remove batch dimension frames = [] for frame in video_frames: # Convert to 8-bit and ensure correct channel order frame = (frame * 255).astype(np.uint8) frames.append(Image.fromarray(frame)) else: raise ValueError("Unexpected frame format") # Create GIF gif_path = "output.gif" frames[0].save( gif_path, save_all=True, append_images=frames[1:], duration=100, # 100ms per frame loop=0, quality=80 ) print(f"Generation took {time.time() - start_time:.2f} seconds") return gif_path # Gradio Interface with gr.Blocks(title="CPU Text-to-Video") as demo: gr.Markdown("# 🐢 CPU Text-to-Video Generator") with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Prompt") with gr.Accordion("Advanced Options", open=False): frames = gr.Slider(4, 12, value=8, step=4, label="Frames") steps = gr.Slider(10, 30, value=20, step=5, label="Steps") submit = gr.Button("Generate") with gr.Column(): output = gr.Image(label="Result", format="gif") gr.Markdown("Note: CPU generation may take several minutes") submit.click( fn=generate_video, inputs=[prompt, frames, steps], outputs=output ) demo.launch()