import os import torch import gradio as gr import spaces # Critical for ZeroGPU support in Spaces from diffusers import DiffusionPipeline # Common for HF video models from PIL import Image # ============================================================= # INITIALIZATION & PIPELINE CONFIGURATION # ============================================================= MODEL_ID = "MiniMaxAI/MiniMax-M1" def load_pipeline(): # Load model with bfloat16 for modern GPU efficiency [17] # Note: Ensure the model is available on the HF Hub or adjust path pipe = DiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, use_safetensors=True ) # Critical VRAM optimizations for deployment on 24GB or ZeroGPU [2] pipe.enable_model_cpu_offload() pipe.enable_vae_slicing() return pipe # Pipeline is initialized globally to avoid reloads on every click pipe = load_pipeline() # ============================================================= # GENERATION LOGIC # ============================================================= @spaces.GPU(duration=300) # Allocated GPU time for complex generation [16] def generate_video(prompt, negative_prompt, steps, guidance_scale, seed): # Standard 5-second video length at 16 FPS [18-20] num_frames = 81 # Use Generator for deterministic results [21, 22] generator = torch.Generator("cuda").manual_seed(int(seed)) # Execution using VACE-style inputs [8, 23] output = pipe( prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=int(steps), guidance_scale=float(guidance_scale), num_frames=num_frames, generator=generator ).frames # Exporting generated frames to a video file [24, 25] # For a real app, use diffusers.utils.export_to_video from diffusers.utils import export_to_video import tempfile temp_path = tempfile.mktemp(suffix=".mp4") export_to_video(output, temp_path, fps=16) return temp_path # ============================================================= # UI DESIGN (Using gr.Blocks for Professional Layout) # ============================================================= # CSS for custom styling to improve "Joyful Experience" [26, 27] css = """ .container { max-width: 1000px; margin: auto; } .gen_btn { background-color: #7224f2 !important; color: white !important; } """ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: gr.Markdown(f"# {MODEL_ID} High-Fidelity Video Generator") gr.Markdown("Leveraging ZeroGPU and VRAM offloading for cinematic AI video [2, 28].") with gr.Row(): with gr.Column(scale=1): prompt_input = gr.Textbox( label="Prompt", placeholder="Describe the action and scene details...", lines=4 ) neg_prompt = gr.Textbox( label="Negative Prompt", value="blurry, distorted, low quality, watermark, text" ) with gr.Accordion("Advanced Settings", open=False): steps = gr.Slider(20, 50, value=30, step=1, label="Inference Steps") guidance = gr.Slider(1.0, 15.0, value=7.0, label="Guidance Scale") seed = gr.Number(value=42, label="Seed") generate_btn = gr.Button("Generate Video", variant="primary", elem_classes="gen_btn") with gr.Column(scale=1): video_output = gr.Video(label="Generated Output") # Event listener mapping [29, 30] generate_btn.click( fn=generate_video, inputs=[prompt_input, neg_prompt, steps, guidance, seed], outputs=video_output ) # ============================================================= # LAUNCH # ============================================================= if __name__ == "__main__": # Ensure app.py is at the root for automatic HF detection [14, 15] demo.launch()