Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from diffusers import DiffusionPipeline | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| import time | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # Set to use CPU | |
| torch_device = "cpu" | |
| torch_dtype = torch.float32 | |
| def load_model(): | |
| model_id = "damo-vilab/text-to-video-ms-1.7b" | |
| pipe = DiffusionPipeline.from_pretrained( | |
| model_id, | |
| torch_dtype=torch_dtype | |
| ) | |
| pipe = pipe.to(torch_device) | |
| pipe.enable_attention_slicing() | |
| return pipe | |
| def generate_video(prompt, num_frames=8, num_inference_steps=20): | |
| start_time = time.time() | |
| if not hasattr(generate_video, "pipe"): | |
| generate_video.pipe = load_model() | |
| with torch.no_grad(): | |
| output = generate_video.pipe( | |
| prompt, | |
| num_frames=min(num_frames, 8), | |
| num_inference_steps=min(num_inference_steps, 20), | |
| height=256, | |
| width=256 | |
| ) | |
| # Correct frame conversion - handle the 4D array properly | |
| video_frames = output.frames | |
| if isinstance(video_frames, np.ndarray): | |
| # Reshape from (1, num_frames, height, width, 3) to (num_frames, height, width, 3) | |
| if video_frames.ndim == 5: | |
| video_frames = video_frames[0] # Remove batch dimension | |
| frames = [] | |
| for frame in video_frames: | |
| # Convert to 8-bit and ensure correct channel order | |
| frame = (frame * 255).astype(np.uint8) | |
| frames.append(Image.fromarray(frame)) | |
| else: | |
| raise ValueError("Unexpected frame format") | |
| # Create GIF | |
| gif_path = "output.gif" | |
| frames[0].save( | |
| gif_path, | |
| save_all=True, | |
| append_images=frames[1:], | |
| duration=100, # 100ms per frame | |
| loop=0, | |
| quality=80 | |
| ) | |
| print(f"Generation took {time.time() - start_time:.2f} seconds") | |
| return gif_path | |
| # Gradio Interface | |
| with gr.Blocks(title="CPU Text-to-Video") as demo: | |
| gr.Markdown("# 🐢 CPU Text-to-Video Generator") | |
| with gr.Row(): | |
| with gr.Column(): | |
| prompt = gr.Textbox(label="Prompt") | |
| with gr.Accordion("Advanced Options", open=False): | |
| frames = gr.Slider(4, 12, value=8, step=4, label="Frames") | |
| steps = gr.Slider(10, 30, value=20, step=5, label="Steps") | |
| submit = gr.Button("Generate") | |
| with gr.Column(): | |
| output = gr.Image(label="Result", format="gif") | |
| gr.Markdown("Note: CPU generation may take several minutes") | |
| submit.click( | |
| fn=generate_video, | |
| inputs=[prompt, frames, steps], | |
| outputs=output | |
| ) | |
| demo.launch() |