LimaRaed commited on
Commit
c901cb7
·
verified ·
1 Parent(s): fbd448d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -34
app.py CHANGED
@@ -1,72 +1,90 @@
1
  import gradio as gr
2
  from diffusers import DiffusionPipeline
3
  import torch
4
- import numpy as np
5
  from PIL import Image
6
- import os
 
 
7
 
8
- # Load the model (we'll use caching to improve performance)
 
 
 
 
9
  def load_model():
10
- model_id = "cerspense/zeroscope_v2_576w"
11
- pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
12
- pipe = pipe.to("cuda")
 
 
 
 
13
  return pipe
14
 
15
- # Generate video from text
16
- def generate_video(prompt, num_frames=24, num_inference_steps=50):
17
- # Load model (with caching)
 
18
  if not hasattr(generate_video, "pipe"):
19
  generate_video.pipe = load_model()
20
 
21
- pipe = generate_video.pipe
22
-
23
- # Generate video
24
- video_frames = pipe(prompt, num_frames=num_frames, num_inference_steps=num_inference_steps).frames
25
-
26
- # Convert frames to video file
27
- output_path = "output.mp4"
28
- frame_rate = 8 # frames per second
 
29
 
30
- # Save as GIF (simpler implementation)
31
  gif_path = "output.gif"
 
32
  video_frames[0].save(
33
  gif_path,
34
  save_all=True,
35
  append_images=video_frames[1:],
36
- duration=1000//frame_rate,
37
  loop=0
38
  )
39
 
 
 
40
  return gif_path
41
 
42
- # Gradio interface
43
- with gr.Blocks() as demo:
44
- gr.Markdown("# 🎥 Text-to-Video Generator")
45
- gr.Markdown("Generate short video clips from text prompts using Zeroscope model")
46
 
47
  with gr.Row():
48
  with gr.Column():
49
- prompt = gr.Textbox(label="Enter your prompt", placeholder="A robot dancing on the moon")
50
- frames = gr.Slider(minimum=8, maximum=48, value=24, step=8, label="Number of frames")
51
- steps = gr.Slider(minimum=20, maximum=100, value=50, step=5, label="Inference steps")
52
- submit = gr.Button("Generate Video")
 
53
 
54
  with gr.Column():
55
- output = gr.Image(label="Generated Video", format="gif")
 
56
 
57
  examples = gr.Examples(
58
  examples=[
59
- ["A spaceship flying through a nebula"],
60
- ["A cat wearing sunglasses surfing on a wave"],
61
- ["A futuristic city with flying cars at sunset"]
62
  ],
63
- inputs=prompt
 
64
  )
65
 
66
  submit.click(
67
  fn=generate_video,
68
  inputs=[prompt, frames, steps],
69
- outputs=output
 
70
  )
71
 
72
- demo.launch()
 
1
  import gradio as gr
2
  from diffusers import DiffusionPipeline
3
  import torch
 
4
  from PIL import Image
5
+ import time
6
+ import warnings
7
+ warnings.filterwarnings("ignore")
8
 
9
+ # Set to use CPU
10
+ torch_device = "cpu"
11
+ torch_dtype = torch.float32 # Use float32 for CPU stability
12
+
13
+ # Load a lightweight model
14
  def load_model():
15
+ model_id = "damo-vilab/text-to-video-ms-1.7b"
16
+ pipe = DiffusionPipeline.from_pretrained(
17
+ model_id,
18
+ torch_dtype=torch_dtype
19
+ )
20
+ pipe = pipe.to(torch_device)
21
+ pipe.enable_attention_slicing() # Reduce memory usage
22
  return pipe
23
 
24
+ def generate_video(prompt, num_frames=8, num_inference_steps=20):
25
+ start_time = time.time()
26
+
27
+ # Load model with caching
28
  if not hasattr(generate_video, "pipe"):
29
  generate_video.pipe = load_model()
30
 
31
+ # Generate with lower resolution and fewer frames for CPU
32
+ with torch.no_grad():
33
+ video_frames = generate_video.pipe(
34
+ prompt,
35
+ num_frames=min(num_frames, 8), # Keep frames low for CPU
36
+ num_inference_steps=min(num_inference_steps, 20),
37
+ height=256, # Lower resolution
38
+ width=256
39
+ ).frames
40
 
41
+ # Create GIF (simpler than video for CPU)
42
  gif_path = "output.gif"
43
+ duration = max(1000 // 3, 100) # Minimum 100ms per frame
44
  video_frames[0].save(
45
  gif_path,
46
  save_all=True,
47
  append_images=video_frames[1:],
48
+ duration=duration,
49
  loop=0
50
  )
51
 
52
+ gen_time = time.time() - start_time
53
+ print(f"Generation took {gen_time:.2f} seconds")
54
  return gif_path
55
 
56
+ # Gradio Interface
57
+ with gr.Blocks(title="CPU Text-to-Video") as demo:
58
+ gr.Markdown("# 🐢 CPU Text-to-Video Generator")
59
+ gr.Markdown("This version runs entirely on CPU - generations will be slower and lower quality")
60
 
61
  with gr.Row():
62
  with gr.Column():
63
+ prompt = gr.Textbox(label="Prompt", placeholder="A fish swimming in space")
64
+ with gr.Accordion("Advanced Options", open=False):
65
+ frames = gr.Slider(4, 12, value=8, step=4, label="Frames")
66
+ steps = gr.Slider(10, 30, value=20, step=5, label="Steps")
67
+ submit = gr.Button("Generate", variant="primary")
68
 
69
  with gr.Column():
70
+ output = gr.Image(label="Result", format="gif")
71
+ gr.Markdown("Note: On CPU, generation may take 5-15 minutes")
72
 
73
  examples = gr.Examples(
74
  examples=[
75
+ ["A paper boat floating on water"],
76
+ ["A sloth wearing sunglasses"],
77
+ ["A candle flame in the wind"]
78
  ],
79
+ inputs=prompt,
80
+ label="Try these examples"
81
  )
82
 
83
  submit.click(
84
  fn=generate_video,
85
  inputs=[prompt, frames, steps],
86
+ outputs=output,
87
+ api_name="generate"
88
  )
89
 
90
+ demo.launch(show_api=False)