orderlymirror commited on
Commit
3704245
Β·
verified Β·
1 Parent(s): 6fe1232

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -48
app.py CHANGED
@@ -4,27 +4,46 @@ import gradio as gr
4
  from diffusers import CogVideoXPipeline
5
  from diffusers.utils import export_to_video
6
 
7
- # 1. Load and optimize the CogVideoX pipeline with CPU offloading only
 
 
8
  pipe = CogVideoXPipeline.from_pretrained(
9
  "THUDM/CogVideoX1.5-5B",
10
  torch_dtype=torch.bfloat16
11
  )
12
- pipe.enable_model_cpu_offload()
13
- pipe.vae.enable_slicing()
14
 
15
- # 2. GPU-decorated generation function
16
- @spaces.GPU(duration=180)
17
- def generate_video(prompt: str, steps: int, frames: int, fps: int, resolution: str) -> str:
 
 
 
 
18
  """
19
- Generates video from text with adjustable steps, frames, fps, and resolution.
20
- Returns path to the saved MP4 file.
21
  """
22
- # Parse resolution string (e.g., "720p" -> height=720)
23
- height = int(resolution.rstrip('p'))
24
- # Width is computed to maintain the model's aspect ratio (assumed 1360x768 -> 16:9)
25
- width = int(height * (16/9))
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Run pipeline; offload handles device placement
28
  output = pipe(
29
  prompt=prompt,
30
  num_inference_steps=steps,
@@ -32,53 +51,63 @@ def generate_video(prompt: str, steps: int, frames: int, fps: int, resolution: s
32
  height=height,
33
  width=width
34
  )
35
- frame_list = output.frames[0]
36
 
37
- # Export to MP4 for browser playback
38
- return export_to_video(frame_list, "generated.mp4", fps=fps)
 
39
 
40
- # 3. Build the Gradio interface with sliders and resolution dropdown
41
- with gr.Blocks(title="CogVideoX Interactive Text-to-Video") as demo:
 
 
42
  gr.Markdown(
43
  """
44
- # 🎞️ Interactive Text‑to‑Video Demo
45
- Adjust the sliders and select resolution to control the diffusion steps,
46
- total frames (length), fps, and video resolution.
47
  """
48
  )
49
- with gr.Column():
50
- prompt_input = gr.Textbox(
51
- label="Prompt",
52
- placeholder="A serene forest at dawn",
53
- lines=2
54
- )
55
- steps_slider = gr.Slider(
56
- minimum=1, maximum=100, step=1, value=25,
57
- label="Inference Steps"
58
- )
59
- frames_slider = gr.Slider(
60
- minimum=16, maximum=320, step=1, value=161,
61
- label="Total Frames (approx. length)"
62
- )
63
- fps_slider = gr.Slider(
64
- minimum=1, maximum=60, step=1, value=16,
65
- label="Frames per Second (fps)"
66
- )
67
- resolution_dropdown = gr.Dropdown(
68
- choices=["360p", "480p", "720p", "1080p"],
69
- value="480p",
70
- label="Resolution"
71
- )
72
- gen_button = gr.Button("Generate Video")
73
- video_output = gr.Video(label="Generated Video", format="mp4")
 
 
 
 
 
74
 
75
  gen_button.click(
76
  fn=generate_video,
77
- inputs=[prompt_input, steps_slider, frames_slider, fps_slider, resolution_dropdown],
78
  outputs=video_output
79
  )
80
 
81
- # 4. Launch the app with SSR disabled
 
 
82
  if __name__ == "__main__":
83
  demo.launch(
84
  server_name="0.0.0.0",
 
4
  from diffusers import CogVideoXPipeline
5
  from diffusers.utils import export_to_video
6
 
7
+ # ────────────────────────────────────────────────────────────
8
+ # 1. Load & optimize the CogVideoX pipeline with CPU offload
9
+ # ────────────────────────────────────────────────────────────
10
  pipe = CogVideoXPipeline.from_pretrained(
11
  "THUDM/CogVideoX1.5-5B",
12
  torch_dtype=torch.bfloat16
13
  )
14
+ pipe.enable_model_cpu_offload() # auto move submodules between CPU/GPU
15
+ pipe.vae.enable_slicing() # slice VAE for extra VRAM savings
16
 
17
+ # ────────────────────────────────────────────────────────────
18
+ # 2. Resolution parsing & sanitization
19
+ # ────────────────────────────────────────────────────────────
20
+ def make_divisible_by_8(x: int) -> int:
21
+ return (x // 8) * 8
22
+
23
+ def parse_resolution(res_str: str):
24
  """
25
+ Convert strings like "480p" into (height, width) both divisible by 8
26
+ while preserving ~16:9 aspect ratio.
27
  """
28
+ h = int(res_str.rstrip("p"))
29
+ w = int(h * 16 / 9)
30
+ return make_divisible_by_8(h), make_divisible_by_8(w)
31
+
32
+ # ────────────────────────────────────────────────────────────
33
+ # 3. GPU‑decorated video generation function
34
+ # ────────────────────────────────────────────────────────────
35
+ @spaces.GPU(duration=600) # allow up to 10Β minutes of GPU time
36
+ def generate_video(
37
+ prompt: str,
38
+ steps: int,
39
+ frames: int,
40
+ fps: int,
41
+ resolution: str
42
+ ) -> str:
43
+ # 3.1 Parse & sanitize resolution
44
+ height, width = parse_resolution(resolution)
45
 
46
+ # 3.2 Run the diffusion pipeline
47
  output = pipe(
48
  prompt=prompt,
49
  num_inference_steps=steps,
 
51
  height=height,
52
  width=width
53
  )
54
+ video_frames = output.frames[0]
55
 
56
+ # 3.3 Export to MP4 (H.264) with chosen FPS
57
+ video_path = export_to_video(video_frames, "generated.mp4", fps=fps)
58
+ return video_path
59
 
60
+ # ────────────────────────────────────────────────────────────
61
+ # 4. Build the Gradio interface with interactive controls
62
+ # ────────────────────────────────────────────────────────────
63
+ with gr.Blocks(title="CogVideoX Text‑to‑Video Demo") as demo:
64
  gr.Markdown(
65
  """
66
+ # 🎞️ CogVideoX1.5‑5B Text‑to‑Video
67
+ Generate up to 10Β s of video from your prompt.
68
+ Adjust inference steps, frame count, fps, and resolution below.
69
  """
70
  )
71
+ with gr.Row():
72
+ with gr.Column():
73
+ prompt_input = gr.Textbox(
74
+ label="Prompt",
75
+ placeholder="e.g., A futuristic city at dawn",
76
+ lines=2
77
+ )
78
+ steps_slider = gr.Slider(
79
+ minimum=1, maximum=100, step=1, value=50,
80
+ label="Inference Steps"
81
+ )
82
+ frames_slider = gr.Slider(
83
+ minimum=16, maximum=320, step=1, value=161,
84
+ label="Total Frames"
85
+ )
86
+ fps_slider = gr.Slider(
87
+ minimum=1, maximum=60, step=1, value=16,
88
+ label="Frames per Second (FPS)"
89
+ )
90
+ res_dropdown = gr.Dropdown(
91
+ choices=["360p", "480p", "720p", "1080p"],
92
+ value="480p",
93
+ label="Resolution"
94
+ )
95
+ gen_button = gr.Button("Generate Video")
96
+ with gr.Column():
97
+ video_output = gr.Video(
98
+ label="Generated Video",
99
+ format="mp4"
100
+ )
101
 
102
  gen_button.click(
103
  fn=generate_video,
104
+ inputs=[prompt_input, steps_slider, frames_slider, fps_slider, res_dropdown],
105
  outputs=video_output
106
  )
107
 
108
+ # ────────────────────────────────────────────────────────────
109
+ # 5. Launch: disable SSR so Gradio blocks and stays alive
110
+ # ────────────────────────────────────────────────────────────
111
  if __name__ == "__main__":
112
  demo.launch(
113
  server_name="0.0.0.0",