Spaces:

akhaliq
/

anycoder-355bd392

Running on Zero

File size: 9,179 Bytes

import gradio as gr
import torch
import spaces
import time
from diffusers import HunyuanVideo15ImageToVideoPipeline
from diffusers.utils import export_to_video, load_image
from PIL import Image
import os
import tempfile

# Model configuration
dtype = torch.bfloat16
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# Initialize pipeline (will be loaded when needed)
pipe = None

def load_model():
    """Load the official HunyuanVideo pipeline on demand"""
    global pipe
    if pipe is None:
        pipe = HunyuanVideo15ImageToVideoPipeline.from_pretrained(
            "tencent/HunyuanVideo-1.5", 
            torch_dtype=dtype,
            variant="480p_i2v_step_distilled"
        )
        pipe.enable_model_cpu_offload()
        pipe.vae.enable_tiling()
    return pipe

@spaces.GPU(duration=120)
def generate_video(image, prompt, seed=1, num_frames=121, num_inference_steps=50, fps=24):
    """
    Generate video from image and prompt using official HunyuanVideo-1.5
    """
    if image is None:
        raise gr.Error("Please upload an image first!")
    
    if not prompt.strip():
        raise gr.Error("Please enter a prompt!")
    
    try:
        # Load model
        pipe = load_model()
        
        # Create generator with seed
        generator = torch.Generator(device=device).manual_seed(seed)
        
        # Load and process image
        if isinstance(image, str):
            input_image = load_image(image)
        else:
            input_image = image
        
        # Generate video
        with torch.inference_mode():
            video_frames = pipe(
                prompt=prompt,
                image=input_image,
                generator=generator,
                num_frames=num_frames,
                num_inference_steps=num_inference_steps,
            ).frames[0]
        
        # Create temporary file for output
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
            output_path = tmp_file.name
        
        # Export video
        export_to_video(video_frames, output_path, fps=fps)
        
        return output_path
        
    except Exception as e:
        raise gr.Error(f"Error generating video: {str(e)}")
    finally:
        # Cleanup GPU memory
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

def create_examples():
    """Create example inputs for the app"""
    example_image = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG"
    example_prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
    
    return [
        [example_image, example_prompt, 1, 121, 50, 24],
        [example_image, "A majestic eagle soaring through mountain peaks at sunset", 42, 121, 50, 24],
        [example_image, "Anime style, a girl with pink hair dancing in cherry blossom petals", 123, 121, 50, 24],
    ]

# Custom theme
custom_theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="indigo",
    neutral_hue="slate",
    font=gr.themes.GoogleFont("Inter"),
    text_size="lg",
    spacing_size="lg",
    radius_size="md"
).set(
    button_primary_background_fill="*primary_600",
    button_primary_background_fill_hover="*primary_700",
    block_title_text_weight="600",
)

with gr.Blocks() as demo:
    # Header with "Built with anycoder" link
    gr.HTML("""
    <div style="text-align: center; margin-bottom: 20px;">
        <h1>🎬 Image to Video Generator</h1>
        <p style="color: #666;">Transform static images into dynamic videos using Official HunyuanVideo-1.5</p>
        <p style="margin-top: 10px;">
            <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #0066cc; text-decoration: none;">
                Built with anycoder
            </a>
        </p>
    </div>
    """)
    
    with gr.Row(equal_height=True):
        with gr.Column(scale=1):
            gr.Markdown("### 📸 Input Image")
            input_image = gr.Image(
                label="Upload Image",
                type="pil",
                height=300,
                sources=["upload", "webcam", "clipboard"]
            )
            
            gr.Markdown("### ✍️ Prompt")
            input_prompt = gr.Textbox(
                label="Describe the video you want to generate",
                placeholder="Describe the motion, style, and content...",
                lines=4,
                max_lines=6
            )
            
            with gr.Accordion("⚙️ Advanced Settings", open=False):
                seed = gr.Number(
                    label="Seed",
                    value=1,
                    minimum=0,
                    maximum=999999,
                    step=1,
                    info="Random seed for reproducible results"
                )
                
                num_frames = gr.Slider(
                    label="Number of Frames",
                    minimum=49,
                    maximum=121,
                    value=121,
                    step=1,
                    info="Higher values = longer videos"
                )
                
                num_inference_steps = gr.Slider(
                    label="Inference Steps",
                    minimum=20,
                    maximum=100,
                    value=50,
                    step=1,
                    info="Higher values = better quality but slower"
                )
                
                fps = gr.Slider(
                    label="FPS",
                    minimum=12,
                    maximum=30,
                    value=24,
                    step=1,
                    info="Frames per second for output video"
                )
            
            generate_btn = gr.Button(
                "🎬 Generate Video",
                variant="primary",
                size="lg"
            )
            
        with gr.Column(scale=1):
            gr.Markdown("### 🎥 Generated Video")
            output_video = gr.Video(
                label="Output Video",
                height=400,
                autoplay=True,
                show_download_button=True
            )
            
            # Status message
            status = gr.Markdown("Ready to generate your video!", visible=True)
    
    # Examples section
    gr.Markdown("### 💡 Examples")
    gr.Examples(
        examples=create_examples(),
        inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
        outputs=output_video,
        fn=generate_video,
        cache_examples=False,
        label="Try these examples"
    )
    
    # Instructions
    with gr.Accordion("📖 How to Use", open=False):
        gr.Markdown("""
        1. **Upload an Image**: Choose any image as the starting frame
        2. **Write a Prompt**: Describe the desired video content and motion
        3. **Adjust Settings**: Optionally modify seed, frames, and quality settings
        4. **Generate**: Click the button and wait for the magic to happen!
        
        **Tips**:
        - Use descriptive prompts with motion words (e.g., "flying", "dancing", "flowing")
        - Higher inference steps improve quality but take longer
        - The seed controls randomness - use the same seed for reproducible results
        - For best results, use clear, high-quality input images
        - This app uses the official Tencent HunyuanVideo-1.5 model
        """)
    
    # Event handler with loading states
    def generate_with_loading(image, prompt, seed_val, frames, steps, fps_val):
        status_msg = "🔄 Generating video... This may take a few minutes."
        yield gr.update(), gr.update(), status_msg
        
        try:
            video_path = generate_video(image, prompt, seed_val, frames, steps, fps_val)
            success_msg = "✅ Video generated successfully!"
            yield video_path, gr.update(), success_msg
        except Exception as e:
            error_msg = f"❌ Error: {str(e)}"
            yield gr.update(), gr.update(), error_msg
    
    generate_btn.click(
        fn=generate_with_loading,
        inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
        outputs=[output_video, generate_btn, status],
        show_progress="full"
    )

# Launch with Gradio 6 syntax
demo.launch(
    theme=custom_theme,
    css="""
    .gradio-container {
        max-width: 1200px !important;
        margin: auto !important;
    }
    """,
    footer_links=[
        {"label": "Official HunyuanVideo Model", "url": "https://huggingface.co/tencent/HunyuanVideo-1.5"},
        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
    ]
)