Spaces:

orderlymirror
/

ttv

Runtime error

App Files Files Community

orderlymirror commited on Apr 21, 2025

Commit

3704245

verified ·

1 Parent(s): 6fe1232

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -48

app.py CHANGED Viewed

@@ -4,27 +4,46 @@ import gradio as gr
 from diffusers import CogVideoXPipeline
 from diffusers.utils import export_to_video
-# 1. Load and optimize the CogVideoX pipeline with CPU offloading only
 pipe = CogVideoXPipeline.from_pretrained(
     "THUDM/CogVideoX1.5-5B",
     torch_dtype=torch.bfloat16
 )
-pipe.enable_model_cpu_offload()
-pipe.vae.enable_slicing()
-# 2. GPU-decorated generation function
-@spaces.GPU(duration=180)
-def generate_video(prompt: str, steps: int, frames: int, fps: int, resolution: str) -> str:
     """
-    Generates video from text with adjustable steps, frames, fps, and resolution.
-    Returns path to the saved MP4 file.
     """
-    # Parse resolution string (e.g., "720p" -> height=720)
-    height = int(resolution.rstrip('p'))
-    # Width is computed to maintain the model's aspect ratio (assumed 1360x768 -> 16:9)
-    width = int(height * (16/9))
-    # Run pipeline; offload handles device placement
     output = pipe(
         prompt=prompt,
         num_inference_steps=steps,
@@ -32,53 +51,63 @@ def generate_video(prompt: str, steps: int, frames: int, fps: int, resolution: s
         height=height,
         width=width
     )
-    frame_list = output.frames[0]
-    # Export to MP4 for browser playback
-    return export_to_video(frame_list, "generated.mp4", fps=fps)
-# 3. Build the Gradio interface with sliders and resolution dropdown
-with gr.Blocks(title="CogVideoX Interactive Text-to-Video") as demo:
     gr.Markdown(
         """
-        # 🎞️ Interactive Text‑to‑Video Demo
-        Adjust the sliders and select resolution to control the diffusion steps,
-        total frames (length), fps, and video resolution.
         """
     )
-    with gr.Column():
-        prompt_input = gr.Textbox(
-            label="Prompt",
-            placeholder="A serene forest at dawn",
-            lines=2
-        )
-        steps_slider = gr.Slider(
-            minimum=1, maximum=100, step=1, value=25,
-            label="Inference Steps"
-        )
-        frames_slider = gr.Slider(
-            minimum=16, maximum=320, step=1, value=161,
-            label="Total Frames (approx. length)"
-        )
-        fps_slider = gr.Slider(
-            minimum=1, maximum=60, step=1, value=16,
-            label="Frames per Second (fps)"
-        )
-        resolution_dropdown = gr.Dropdown(
-            choices=["360p", "480p", "720p", "1080p"],
-            value="480p",
-            label="Resolution"
-        )
-        gen_button = gr.Button("Generate Video")
-    video_output = gr.Video(label="Generated Video", format="mp4")
     gen_button.click(
         fn=generate_video,
-        inputs=[prompt_input, steps_slider, frames_slider, fps_slider, resolution_dropdown],
         outputs=video_output
     )
-# 4. Launch the app with SSR disabled
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",

 from diffusers import CogVideoXPipeline
 from diffusers.utils import export_to_video
+# ────────────────────────────────────────────────────────────
+# 1. Load & optimize the CogVideoX pipeline with CPU offload
+# ────────────────────────────────────────────────────────────
 pipe = CogVideoXPipeline.from_pretrained(
     "THUDM/CogVideoX1.5-5B",
     torch_dtype=torch.bfloat16
 )
+pipe.enable_model_cpu_offload()   # auto move submodules between CPU/GPU
+pipe.vae.enable_slicing()         # slice VAE for extra VRAM savings
+# ────────────────────────────────────────────────────────────
+# 2. Resolution parsing & sanitization
+# ────────────────────────────────────────────────────────────
+def make_divisible_by_8(x: int) -> int:
+    return (x // 8) * 8
+def parse_resolution(res_str: str):
     """
+    Convert strings like "480p" into (height, width) both divisible by 8
+    while preserving ~16:9 aspect ratio.
     """
+    h = int(res_str.rstrip("p"))
+    w = int(h * 16 / 9)
+    return make_divisible_by_8(h), make_divisible_by_8(w)
+# ────────────────────────────────────────────────────────────
+# 3. GPU‑decorated video generation function
+# ────────────────────────────────────────────────────────────
+@spaces.GPU(duration=600)  # allow up to 10 minutes of GPU time
+def generate_video(
+    prompt: str,
+    steps: int,
+    frames: int,
+    fps: int,
+    resolution: str
+) -> str:
+    # 3.1 Parse & sanitize resolution
+    height, width = parse_resolution(resolution)
+    # 3.2 Run the diffusion pipeline
     output = pipe(
         prompt=prompt,
         num_inference_steps=steps,
         height=height,
         width=width
     )
+    video_frames = output.frames[0]
+    # 3.3 Export to MP4 (H.264) with chosen FPS
+    video_path = export_to_video(video_frames, "generated.mp4", fps=fps)
+    return video_path
+# ────────────────────────────────────────────────────────────
+# 4. Build the Gradio interface with interactive controls
+# ────────────────────────────────────────────────────────────
+with gr.Blocks(title="CogVideoX Text‑to‑Video Demo") as demo:
     gr.Markdown(
         """
+        # 🎞️ CogVideoX1.5‑5B Text‑to‑Video
+        Generate up to 10 s of video from your prompt.
+        Adjust inference steps, frame count, fps, and resolution below.
         """
     )
+    with gr.Row():
+        with gr.Column():
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                placeholder="e.g., A futuristic city at dawn",
+                lines=2
+            )
+            steps_slider = gr.Slider(
+                minimum=1, maximum=100, step=1, value=50,
+                label="Inference Steps"
+            )
+            frames_slider = gr.Slider(
+                minimum=16, maximum=320, step=1, value=161,
+                label="Total Frames"
+            )
+            fps_slider = gr.Slider(
+                minimum=1, maximum=60, step=1, value=16,
+                label="Frames per Second (FPS)"
+            )
+            res_dropdown = gr.Dropdown(
+                choices=["360p", "480p", "720p", "1080p"],
+                value="480p",
+                label="Resolution"
+            )
+            gen_button = gr.Button("Generate Video")
+        with gr.Column():
+            video_output = gr.Video(
+                label="Generated Video",
+                format="mp4"
+            )
     gen_button.click(
         fn=generate_video,
+        inputs=[prompt_input, steps_slider, frames_slider, fps_slider, res_dropdown],
         outputs=video_output
     )
+# ────────────────────────────────────────────────────────────
+# 5. Launch: disable SSR so Gradio blocks and stays alive
+# ────────────────────────────────────────────────────────────
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",