| import gradio as gr |
| import torch |
| import spaces |
| import os |
| import tempfile |
| import random |
| from PIL import Image |
| from diffusers import AutoencoderKLWan, WanImageToVideoPipeline |
| from diffusers.utils import export_to_video |
| from transformers import CLIPVisionModel |
| from huggingface_hub import InferenceClient |
|
|
| |
| HF_TOKEN = os.environ.get("HF_TOKEN", None) |
| MODEL_REPO = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers" |
|
|
| |
| llm_client = InferenceClient( |
| model="mistralai/Mistral-7B-Instruct-v0.3", |
| token=HF_TOKEN, |
| ) |
|
|
| VIDEO_SYSTEM = """You are an expert at writing motion prompts for AI video generation using Wan I2V. |
| |
| Your job: take a short description of desired motion/animation and expand it into a detailed video motion prompt. |
| |
| Rules: |
| - Focus on MOTION β what moves, how it moves, camera movement |
| - Be specific: "hair gently blowing in breeze", "camera slowly pulls back", "eyes blink naturally" |
| - Keep subjects consistent with what is already in the image |
| - Describe lighting changes if relevant e.g. "light flickers softly" |
| - Do NOT describe the static image content β only the motion |
| - Return ONLY the prompt, no explanation, no preamble |
| - Keep under 80 words""" |
|
|
| def expand_video_prompt(raw_prompt): |
| if not raw_prompt.strip(): |
| return "subtle natural movement, gentle camera drift, cinematic atmosphere" |
| try: |
| response = llm_client.chat_completion( |
| messages=[ |
| {"role": "system", "content": VIDEO_SYSTEM}, |
| {"role": "user", "content": f"Expand this motion description:\n{raw_prompt.strip()}"}, |
| ], |
| max_tokens=150, |
| temperature=0.6, |
| ) |
| return response.choices[0].message.content.strip().strip('"').strip("'") |
| except Exception as e: |
| print(f"LLM expansion failed, using raw prompt: {e}") |
| return raw_prompt.strip() |
|
|
| |
| print("Loading Wan2.1 I2V pipeline...") |
|
|
| image_encoder = CLIPVisionModel.from_pretrained( |
| MODEL_REPO, |
| subfolder="image_encoder", |
| torch_dtype=torch.float32, |
| ) |
|
|
| vae = AutoencoderKLWan.from_pretrained( |
| MODEL_REPO, |
| subfolder="vae", |
| torch_dtype=torch.float32, |
| ) |
|
|
| pipe = WanImageToVideoPipeline.from_pretrained( |
| MODEL_REPO, |
| vae=vae, |
| image_encoder=image_encoder, |
| torch_dtype=torch.bfloat16, |
| ) |
|
|
| pipe.enable_model_cpu_offload() |
| print("Pipeline ready.") |
|
|
| |
| VIDEO_NEG = ( |
| "static, no movement, blurry, low quality, worst quality, " |
| "inconsistent motion, flickering, jitter, artifacts, " |
| "watermark, text, deformed" |
| ) |
|
|
| |
| @spaces.GPU(duration=300) |
| def generate_video(input_image, motion_prompt, num_frames, guidance, seed, randomize): |
|
|
| if input_image is None: |
| raise gr.Error("Please upload an image first.") |
|
|
| if randomize: |
| seed = random.randint(0, 2**32 - 1) |
| seed = int(seed) |
|
|
| |
| expanded_motion = expand_video_prompt(motion_prompt) |
| print(f"Expanded motion: {expanded_motion}") |
|
|
| |
| img = Image.fromarray(input_image).convert("RGB") |
| orig_w, orig_h = img.size |
| aspect = orig_w / orig_h |
| if aspect >= 1: |
| new_w, new_h = 832, 480 |
| else: |
| new_w, new_h = 480, 832 |
| img = img.resize((new_w, new_h), Image.LANCZOS) |
|
|
| generator = torch.Generator(device="cpu").manual_seed(seed) |
|
|
| output = pipe( |
| image=img, |
| prompt=expanded_motion, |
| negative_prompt=VIDEO_NEG, |
| height=new_h, |
| width=new_w, |
| num_frames=int(num_frames), |
| guidance_scale=float(guidance), |
| num_inference_steps=30, |
| generator=generator, |
| ) |
|
|
| frames = output.frames[0] |
|
|
| tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) |
| export_to_video(frames, tmp.name, fps=16) |
|
|
| return tmp.name, seed, f"**Motion prompt sent to model:**\n\n{expanded_motion}" |
|
|
| |
| css = """ |
| * { box-sizing: border-box; margin: 0; padding: 0; } |
| |
| body, .gradio-container { |
| background: #07070e !important; |
| font-family: 'Inter', system-ui, sans-serif !important; |
| max-width: 500px !important; |
| margin: 0 auto !important; |
| padding: 8px !important; |
| } |
| |
| .topbar { |
| display: flex; |
| align-items: center; |
| justify-content: space-between; |
| padding: 10px 2px 14px; |
| } |
| .topbar-title { |
| color: #e8e0ff; |
| font-size: 0.95em; |
| font-weight: 800; |
| } |
| .gpu-pill { |
| background: #1aff7a18; |
| border: 1px solid #1aff7a44; |
| color: #1aff7a; |
| font-size: 0.6em; |
| font-weight: 800; |
| padding: 4px 12px; |
| border-radius: 20px; |
| letter-spacing: 1.5px; |
| text-transform: uppercase; |
| } |
| |
| .upload-area { |
| background: #0d0d1a; |
| border: 2px dashed #1e1e35; |
| border-radius: 18px; |
| overflow: hidden; |
| margin-bottom: 8px; |
| min-height: 260px; |
| display: flex; |
| align-items: center; |
| justify-content: center; |
| } |
| |
| .video-out { |
| background: #0d0d1a; |
| border: 1px solid #16162a; |
| border-radius: 18px; |
| overflow: hidden; |
| margin-bottom: 8px; |
| min-height: 260px; |
| } |
| |
| .card { |
| background: #0d0d1a; |
| border: 1px solid #16162a; |
| border-radius: 14px; |
| padding: 14px; |
| margin-bottom: 8px; |
| } |
| .card-label { |
| color: #3d3060; |
| font-size: 0.62em; |
| font-weight: 800; |
| text-transform: uppercase; |
| letter-spacing: 2px; |
| margin-bottom: 8px; |
| } |
| |
| textarea { |
| background: transparent !important; |
| border: none !important; |
| color: #c8b8f0 !important; |
| font-size: 15px !important; |
| line-height: 1.6 !important; |
| padding: 0 !important; |
| resize: none !important; |
| box-shadow: none !important; |
| width: 100% !important; |
| outline: none !important; |
| } |
| textarea::placeholder { color: #252038 !important; } |
| textarea:focus { |
| outline: none !important; |
| box-shadow: none !important; |
| border: none !important; |
| } |
| |
| .gradio-accordion { |
| background: #0d0d1a !important; |
| border: 1px solid #16162a !important; |
| border-radius: 14px !important; |
| margin-bottom: 8px !important; |
| overflow: hidden !important; |
| } |
| .gradio-accordion .label-wrap button { |
| color: #4a3a6a !important; |
| font-size: 0.72em !important; |
| font-weight: 700 !important; |
| text-transform: uppercase !important; |
| letter-spacing: 1.5px !important; |
| padding: 12px 16px !important; |
| } |
| |
| .gradio-slider { |
| background: transparent !important; |
| border: none !important; |
| padding: 4px 0 10px !important; |
| } |
| input[type=range] { |
| accent-color: #3366bb !important; |
| width: 100% !important; |
| } |
| |
| input[type=number] { |
| background: #0a0a14 !important; |
| border: 1px solid #18182a !important; |
| border-radius: 10px !important; |
| color: #7799cc !important; |
| font-size: 13px !important; |
| padding: 8px 10px !important; |
| } |
| |
| input[type=checkbox] { accent-color: #3366bb !important; } |
| .gradio-checkbox label span { |
| color: #4a3a6a !important; |
| font-size: 0.75em !important; |
| font-weight: 600 !important; |
| } |
| |
| label > span:first-child { |
| color: #3a2d55 !important; |
| font-size: 0.7em !important; |
| font-weight: 700 !important; |
| text-transform: uppercase !important; |
| letter-spacing: 1px !important; |
| } |
| |
| .seed-out input[type=number] { |
| background: transparent !important; |
| border: none !important; |
| color: #2e2848 !important; |
| font-size: 0.7em !important; |
| text-align: center !important; |
| padding: 2px !important; |
| } |
| |
| .hint-box { |
| background: #0a0a14; |
| border: 1px solid #111122; |
| border-radius: 10px; |
| padding: 10px 14px; |
| color: #443366; |
| font-size: 0.72em; |
| line-height: 1.7; |
| margin-bottom: 8px; |
| word-break: break-word; |
| } |
| |
| .gen-btn button { |
| background: linear-gradient(135deg, #1a3aaa 0%, #0e1e77 100%) !important; |
| border: 1px solid #2255cc !important; |
| border-radius: 14px !important; |
| color: #fff !important; |
| font-size: 0.88em !important; |
| font-weight: 900 !important; |
| padding: 17px !important; |
| width: 100% !important; |
| letter-spacing: 2px !important; |
| text-transform: uppercase !important; |
| box-shadow: 0 4px 24px #1a3aaa55 !important; |
| transition: all 0.15s ease !important; |
| margin-top: 6px !important; |
| } |
| .gen-btn button:hover { |
| box-shadow: 0 6px 32px #1a3aaa99 !important; |
| transform: translateY(-1px) !important; |
| } |
| .gen-btn button:active { |
| transform: scale(0.98) !important; |
| box-shadow: 0 2px 12px #1a3aaa33 !important; |
| } |
| |
| footer, .built-with { display: none !important; } |
| """ |
|
|
| |
| with gr.Blocks(css=css, title="VideoGen") as demo: |
|
|
| gr.HTML(""" |
| <div class="topbar"> |
| <span class="topbar-title">π¬ Wan I2V β Image to Video</span> |
| <span class="gpu-pill">β‘ ZeroGPU</span> |
| </div> |
| """) |
|
|
| gr.HTML(""" |
| <div class="hint-box"> |
| Upload any image β describe the motion β get a ~3β5 second 480P video.<br><br> |
| <strong>Motion tips:</strong> describe what moves, not what's in the image.<br> |
| e.g. <em>"hair gently blowing, eyes blink, camera slowly pulls back"</em> |
| </div> |
| """) |
|
|
| input_image = gr.Image( |
| label="Input Image", |
| type="numpy", |
| height=300, |
| elem_classes="upload-area", |
| ) |
|
|
| gr.HTML('<div class="card"><div class="card-label">β¦ Motion β what should move?</div>') |
| motion_prompt = gr.Textbox( |
| show_label=False, |
| placeholder="hair gently blowing, eyes blinking slowly, soft light shimmer...", |
| lines=2, |
| ) |
| gr.HTML('</div>') |
|
|
| generate_btn = gr.Button( |
| "Generate Video β¦", variant="primary", |
| size="lg", elem_classes="gen-btn", |
| ) |
|
|
| output_video = gr.Video( |
| label="Generated Video", |
| elem_classes="video-out", |
| height=300, |
| ) |
|
|
| used_seed = gr.Number( |
| label="seed", interactive=False, |
| elem_classes="seed-out", |
| ) |
|
|
| expanded_out = gr.Markdown( |
| value="", |
| elem_classes="hint-box", |
| ) |
|
|
| with gr.Accordion("βοΈ Settings", open=False): |
| gr.HTML('<div style="height:6px"></div>') |
|
|
| num_frames = gr.Slider( |
| minimum=17, |
| maximum=81, |
| value=49, |
| step=16, |
| label="Frames β 17β1s 49β3s 81β5s (at 16fps)", |
| ) |
| guidance = gr.Slider( |
| minimum=1.0, |
| maximum=10.0, |
| value=5.0, |
| step=0.5, |
| label="Guidance Scale", |
| ) |
| with gr.Row(): |
| seed = gr.Number( |
| label="Seed", value=42, precision=0, |
| minimum=0, maximum=2**32-1, scale=3, |
| ) |
| randomize = gr.Checkbox( |
| label="Random seed", value=True, scale=1, |
| ) |
|
|
| generate_btn.click( |
| fn=generate_video, |
| inputs=[ |
| input_image, motion_prompt, num_frames, |
| guidance, seed, randomize, |
| ], |
| outputs=[output_video, used_seed, expanded_out], |
| ) |
|
|
| demo.launch() |
|
|