Spaces:

mvp-lab
/

Sketch2MotionAI

Sleeping

File size: 8,551 Bytes

0aaa0e7
3175b6a
adc71a4
 
 
 
84cb47f
adc71a4
66d6b8c
 
 
 
 
0aaa0e7
adc71a4
 
 
3175b6a
a2f9216
 
 
 
 
 
 
 
 
 
 
 
 
3175b6a
adc71a4
3175b6a
adc71a4
a2f9216
adc71a4
 
 
89682a1
adc71a4
 
 
 
 
 
89682a1
a2f9216
 
 
adc71a4
 
 
a2f9216
adc71a4
e5d46ae
3175b6a
 
adc71a4
 
 
 
 
a2f9216
adc71a4
 
 
3175b6a
 
adc71a4
3175b6a
 
adc71a4
3175b6a
adc71a4
2015b5e
 
66c8799
2015b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adc71a4
a2f9216
 
 
 
 
 
 
66c8799
 
733b46b
a2f9216
 
 
adc71a4
a2f9216
d2d418c
a2f9216
adc71a4
 
 
e5d46ae
adc71a4
 
 
a2f9216
adc71a4
79c96a7
adc71a4
 
 
 
 
 
e5d46ae
adc71a4
 
 
f400fe4
adc71a4
f400fe4
adc71a4
 
9ad3692
 
adc71a4
9ad3692
 
adc71a4
 
9ad3692
 
733b46b
9ad3692
adc71a4
 
733b46b
a2f9216
733b46b
 
a2f9216
 
 
 
 
 
733b46b
adc71a4
 
 
733b46b
adc71a4
9ad3692
733b46b
9ad3692
733b46b
9ad3692
adc71a4
 
733b46b
 
 
adc71a4
a2f9216
733b46b
 
a2f9216
733b46b
 
9ad3692

import gradio as gr
import torch
import spaces
import requests
import time
import os
from PIL import Image
from lumaai import LumaAI
from diffusers import (
    StableDiffusionXLControlNetPipeline, 
    StableDiffusionXLControlNetImg2ImgPipeline, 
    ControlNetModel
)

# 🔑 Fetch Keys from Hugging Face Secrets
LUMA_API_KEY = os.getenv("LUMAAI_API_KEY")
luma_client = LumaAI(auth_token=LUMA_API_KEY)

# ==========================================
# ART STYLE PROMPT DICTIONARY
# ==========================================
ART_STYLES = {
    "Spiderverse": "Spider-Man Into the Spider-Verse art style, vibrant comic book aesthetic, halftone dot patterns, chromatic aberration, offset CMYK printing misregistration, sharp stylized linework, vibrant neon colors, dynamic 2D/3D hybrid animation look",
    "Arcane": "Arcane animated series art style, Fortiche production, 3D models with hand-painted 2D textures, visible brushstrokes, dramatic chiaroscuro lighting, moody atmospheric volumetric glow, stylized realism, steampunk hextech vibe",
    "Studio Ghibli": "Studio Ghibli animation style, Hayao Miyazaki, beautiful hand-drawn cel-shaded anime, lush watercolor backgrounds, soft warm lighting, vibrant nostalgic nature, clean linework, highly detailed whimsical environments",
    "90s Cyberpunk Anime": "1990s Cyberpunk anime style, Akira and Ghost in the Shell aesthetic, glowing neon city lights, holographic projections, cybernetic enhancements, highly detailed retro-futuristic cel-shaded animation, dark dystopian atmosphere, sharp stylized shadows",
    "Studio Claymation": "Premium claymation stop-motion animation, Laika Studio style, tangible tactile clay textures, visible fingerprints, miniature practical sets, dramatic cinematic studio lighting, macro photography depth of field, physical materials",
    "Dark Fantasy": "Grimdark fantasy art style, Elden Ring and Dark Souls aesthetic, gothic architecture, intricate weathered details, moody atmospheric fog, desaturated color palette with striking focal lighting, epic scale, ominous, ethereal volumetric rays",
    "Retro VHS Sci-Fi": "1980s retro sci-fi aesthetic, VHS tape degradation, CRT monitor scanlines, synthwave color palette with hot pinks and electric blues, heavy lens flares, analog glitch effects, grainy film texture, nostalgic glowing high-contrast lighting"
}

# ==========================================
# PHASE 1: GPU DRAWING (ZeroGPU)
# ==========================================
@spaces.GPU(duration=60)
def generate_frames(sketch_1, sketch_2, image_prompt, style_prompt, ctrl_scale, consistency):
    print("🎨 Initializing GPU Pipelines...")
    dtype = torch.float16
    controlnet = ControlNetModel.from_pretrained("xinsir/controlnet-scribble-sdxl-1.0", torch_dtype=dtype)
    
    # Load Base Image Pipe
    pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
        "Lykon/dreamshaper-xl-1-0", 
        controlnet=controlnet, 
        torch_dtype=dtype
    ).to("cuda")
    
    # Combine user prompt with the detailed style prompt
    full_prompt = f"stunning digital illustration, {image_prompt}, {style_prompt}"
    
    # --- Start Frame ---
    s1 = sketch_1.convert("RGB").resize((1024, 1024), Image.NEAREST)
    img_1 = pipe(
        prompt=full_prompt, 
        image=s1, 
        controlnet_conditioning_scale=float(ctrl_scale)
    ).images[0]
    
    # --- End Frame ---
    # We swap to Img2Img to ensure the character doesn't change between frames
    i2i_pipe = StableDiffusionXLControlNetImg2ImgPipeline(**pipe.components).to("cuda")
    s2 = sketch_2.convert("RGB").resize((1024, 1024), Image.NEAREST)
    img_2 = i2i_pipe(
        prompt=full_prompt, 
        image=img_1, 
        control_image=s2, 
        strength=float(consistency)
    ).images[0]
    
    return img_1, img_2

# ==========================================
# PHASE 2: CPU ORCHESTRATION (API & UI)
# ==========================================
def upload_to_web(image):
    filename = f"temp_frame_{int(time.time() * 1000)}.jpg"
    image.convert("RGB").save(filename, format="JPEG", quality=90)
    
    try:
        with open(filename, "rb") as file:
            response = requests.post(
                "https://freeimage.host/api/1/upload",
                data={
                    "key": "6d207e02198a847aa98d0a2a901485a5", 
                    "action": "upload"
                },
                files={"source": file},
                timeout=30 
            )
            
        if os.path.exists(filename):
            os.remove(filename)
            
        return response.json()["image"]["url"]
        
    except requests.exceptions.Timeout:
        raise gr.Error("☁️ Image host took too long to respond. Please try again.")
    except Exception as e:
        raise gr.Error(f"☁️ Image upload failed: {str(e)}")

def master_pipeline(sketch_1, sketch_2, image_prompt, video_prompt, selected_style, ctrl_scale, consistency):
    # Armor: Prevent crashes if the user forgets inputs
    if sketch_1 is None or sketch_2 is None:
        raise gr.Error("🖼️ Please upload both the Start and End sketches!")
    if not image_prompt or not video_prompt:
        raise gr.Error("✍️ Please provide both an Image Prompt and a Video Prompt!")
        
    if not LUMA_API_KEY:
        raise gr.Error("🔑 Luma API Key missing! Add it to Space Secrets.")
        
    # Fetch the highly descriptive prompt block
    style_prompt = ART_STYLES[selected_style]
        
    # 1. Generate Images on ZeroGPU
    img_1, img_2 = generate_frames(sketch_1, sketch_2, image_prompt, style_prompt, ctrl_scale, consistency)
    
    # 2. Upload to FreeImage.host (CPU)
    print("☁️ Uploading to cloud...")
    url_1 = upload_to_web(img_1)
    url_2 = upload_to_web(img_2)
    
    # 3. Request Luma Video (CPU)
    print("🎬 Requesting Luma Ray-2 Animation...")
    generation = luma_client.generations.create(
        prompt=f"Cinematic stylized animation, {video_prompt}, {style_prompt}",
        model="ray-2",
        duration="5s",
        resolution="540p",
        keyframes={
            "frame0": {"type": "image", "url": url_1},
            "frame1": {"type": "image", "url": url_2}
        }
    )
    
    # 4. Wait for Luma (Polling on CPU)
    while True:
        res = luma_client.generations.get(id=generation.id)
        if res.state == "completed":
            return img_1, img_2, res.assets.video
        elif res.state == "failed":
            raise gr.Error(f"Luma API Error: {res.failure_reason}")
        time.sleep(5)

# ==========================================
# 5. GRADIO UI
# ==========================================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ✏️ Sketch-to-Motion")
    gr.Markdown("Transform sketches into professional animations using local ZeroGPU stylization and Luma cloud rendering.")
    
    with gr.Row():
        # --- INPUT COLUMN ---
        with gr.Column():
            s1 = gr.Image(type="pil", label="Start Sketch (Black & White)")
            s2 = gr.Image(type="pil", label="End Sketch (Black & White)")
            
            # Separated Prompts & Style Dropdown
            img_prompt = gr.Textbox(label="Image Prompt", placeholder="e.g., Plague doctor, bird mask, cinematic lighting")
            vid_prompt = gr.Textbox(label="Video Prompt", placeholder="e.g., The plague doctor's mask snaps open and closed, chanting")
            style_dropdown = gr.Dropdown(
                choices=list(ART_STYLES.keys()), 
                value="Arcane", 
                label="Animation Style", 
                info="Select the visual aesthetic for your generation."
            )
            
            with gr.Accordion("Settings", open=False):
                sc = gr.Slider(0, 2, 0.6, label="Sketch Strictness")
                cs = gr.Slider(0.5, 1, 0.85, label="Color Consistency")
                
            btn = gr.Button("Generate Animation", variant="primary")
            
        # --- OUTPUT COLUMN ---
        with gr.Column():
            # Frames moved to the top
            with gr.Row():
                o1 = gr.Image(label="Start Frame")
                o2 = gr.Image(label="End Frame")
                
            # Video moved below the frames
            vid = gr.Video(label="Final Video")

    # Updated click event to include the dropdown
    btn.click(
        fn=master_pipeline, 
        inputs=[s1, s2, img_prompt, vid_prompt, style_dropdown, sc, cs], 
        outputs=[o1, o2, vid]
    )

demo.launch()