Spaces:

LimaRaed
/

DS_TextToVideo

Sleeping

App Files Files Community

LimaRaed commited on May 4, 2025

Commit

c901cb7

verified ·

1 Parent(s): fbd448d

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -34

app.py CHANGED Viewed

@@ -1,72 +1,90 @@
 import gradio as gr
 from diffusers import DiffusionPipeline
 import torch
-import numpy as np
 from PIL import Image
-import os
-# Load the model (we'll use caching to improve performance)
 def load_model():
-    model_id = "cerspense/zeroscope_v2_576w"
-    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
-    pipe = pipe.to("cuda")
     return pipe
-# Generate video from text
-def generate_video(prompt, num_frames=24, num_inference_steps=50):
-    # Load model (with caching)
     if not hasattr(generate_video, "pipe"):
         generate_video.pipe = load_model()
-    pipe = generate_video.pipe
-    # Generate video
-    video_frames = pipe(prompt, num_frames=num_frames, num_inference_steps=num_inference_steps).frames
-    # Convert frames to video file
-    output_path = "output.mp4"
-    frame_rate = 8  # frames per second
-    # Save as GIF (simpler implementation)
     gif_path = "output.gif"
     video_frames[0].save(
         gif_path,
         save_all=True,
         append_images=video_frames[1:],
-        duration=1000//frame_rate,
         loop=0
     )
     return gif_path
-# Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# 🎥 Text-to-Video Generator")
-    gr.Markdown("Generate short video clips from text prompts using Zeroscope model")
     with gr.Row():
         with gr.Column():
-            prompt = gr.Textbox(label="Enter your prompt", placeholder="A robot dancing on the moon")
-            frames = gr.Slider(minimum=8, maximum=48, value=24, step=8, label="Number of frames")
-            steps = gr.Slider(minimum=20, maximum=100, value=50, step=5, label="Inference steps")
-            submit = gr.Button("Generate Video")
         with gr.Column():
-            output = gr.Image(label="Generated Video", format="gif")
     examples = gr.Examples(
         examples=[
-            ["A spaceship flying through a nebula"],
-            ["A cat wearing sunglasses surfing on a wave"],
-            ["A futuristic city with flying cars at sunset"]
         ],
-        inputs=prompt
     )
     submit.click(
         fn=generate_video,
         inputs=[prompt, frames, steps],
-        outputs=output
     )
-demo.launch()

 import gradio as gr
 from diffusers import DiffusionPipeline
 import torch
 from PIL import Image
+import time
+import warnings
+warnings.filterwarnings("ignore")
+# Set to use CPU
+torch_device = "cpu"
+torch_dtype = torch.float32  # Use float32 for CPU stability
+# Load a lightweight model
 def load_model():
+    model_id = "damo-vilab/text-to-video-ms-1.7b"
+    pipe = DiffusionPipeline.from_pretrained(
+        model_id,
+        torch_dtype=torch_dtype
+    )
+    pipe = pipe.to(torch_device)
+    pipe.enable_attention_slicing()  # Reduce memory usage
     return pipe
+def generate_video(prompt, num_frames=8, num_inference_steps=20):
+    start_time = time.time()
+    # Load model with caching
     if not hasattr(generate_video, "pipe"):
         generate_video.pipe = load_model()
+    # Generate with lower resolution and fewer frames for CPU
+    with torch.no_grad():
+        video_frames = generate_video.pipe(
+            prompt,
+            num_frames=min(num_frames, 8),  # Keep frames low for CPU
+            num_inference_steps=min(num_inference_steps, 20),
+            height=256,  # Lower resolution
+            width=256
+        ).frames
+    # Create GIF (simpler than video for CPU)
     gif_path = "output.gif"
+    duration = max(1000 // 3, 100)  # Minimum 100ms per frame
     video_frames[0].save(
         gif_path,
         save_all=True,
         append_images=video_frames[1:],
+        duration=duration,
         loop=0
     )
+    gen_time = time.time() - start_time
+    print(f"Generation took {gen_time:.2f} seconds")
     return gif_path
+# Gradio Interface
+with gr.Blocks(title="CPU Text-to-Video") as demo:
+    gr.Markdown("# 🐢 CPU Text-to-Video Generator")
+    gr.Markdown("This version runs entirely on CPU - generations will be slower and lower quality")
     with gr.Row():
         with gr.Column():
+            prompt = gr.Textbox(label="Prompt", placeholder="A fish swimming in space")
+            with gr.Accordion("Advanced Options", open=False):
+                frames = gr.Slider(4, 12, value=8, step=4, label="Frames")
+                steps = gr.Slider(10, 30, value=20, step=5, label="Steps")
+            submit = gr.Button("Generate", variant="primary")
         with gr.Column():
+            output = gr.Image(label="Result", format="gif")
+            gr.Markdown("Note: On CPU, generation may take 5-15 minutes")
     examples = gr.Examples(
         examples=[
+            ["A paper boat floating on water"],
+            ["A sloth wearing sunglasses"],
+            ["A candle flame in the wind"]
         ],
+        inputs=prompt,
+        label="Try these examples"
     )
     submit.click(
         fn=generate_video,
         inputs=[prompt, frames, steps],
+        outputs=output,
+        api_name="generate"
     )
+demo.launch(show_api=False)