File size: 9,179 Bytes
defc73b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705bee3
defc73b
 
 
705bee3
 
 
defc73b
 
 
 
 
 
 
 
705bee3
defc73b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705bee3
defc73b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705bee3
defc73b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705bee3
defc73b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import gradio as gr
import torch
import spaces
import time
from diffusers import HunyuanVideo15ImageToVideoPipeline
from diffusers.utils import export_to_video, load_image
from PIL import Image
import os
import tempfile

# Model configuration
dtype = torch.bfloat16
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# Initialize pipeline (will be loaded when needed)
pipe = None

def load_model():
    """Load the official HunyuanVideo pipeline on demand"""
    global pipe
    if pipe is None:
        pipe = HunyuanVideo15ImageToVideoPipeline.from_pretrained(
            "tencent/HunyuanVideo-1.5", 
            torch_dtype=dtype,
            variant="480p_i2v_step_distilled"
        )
        pipe.enable_model_cpu_offload()
        pipe.vae.enable_tiling()
    return pipe

@spaces.GPU(duration=120)
def generate_video(image, prompt, seed=1, num_frames=121, num_inference_steps=50, fps=24):
    """
    Generate video from image and prompt using official HunyuanVideo-1.5
    """
    if image is None:
        raise gr.Error("Please upload an image first!")
    
    if not prompt.strip():
        raise gr.Error("Please enter a prompt!")
    
    try:
        # Load model
        pipe = load_model()
        
        # Create generator with seed
        generator = torch.Generator(device=device).manual_seed(seed)
        
        # Load and process image
        if isinstance(image, str):
            input_image = load_image(image)
        else:
            input_image = image
        
        # Generate video
        with torch.inference_mode():
            video_frames = pipe(
                prompt=prompt,
                image=input_image,
                generator=generator,
                num_frames=num_frames,
                num_inference_steps=num_inference_steps,
            ).frames[0]
        
        # Create temporary file for output
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
            output_path = tmp_file.name
        
        # Export video
        export_to_video(video_frames, output_path, fps=fps)
        
        return output_path
        
    except Exception as e:
        raise gr.Error(f"Error generating video: {str(e)}")
    finally:
        # Cleanup GPU memory
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

def create_examples():
    """Create example inputs for the app"""
    example_image = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG"
    example_prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
    
    return [
        [example_image, example_prompt, 1, 121, 50, 24],
        [example_image, "A majestic eagle soaring through mountain peaks at sunset", 42, 121, 50, 24],
        [example_image, "Anime style, a girl with pink hair dancing in cherry blossom petals", 123, 121, 50, 24],
    ]

# Custom theme
custom_theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="indigo",
    neutral_hue="slate",
    font=gr.themes.GoogleFont("Inter"),
    text_size="lg",
    spacing_size="lg",
    radius_size="md"
).set(
    button_primary_background_fill="*primary_600",
    button_primary_background_fill_hover="*primary_700",
    block_title_text_weight="600",
)

with gr.Blocks() as demo:
    # Header with "Built with anycoder" link
    gr.HTML("""
    <div style="text-align: center; margin-bottom: 20px;">
        <h1>🎬 Image to Video Generator</h1>
        <p style="color: #666;">Transform static images into dynamic videos using Official HunyuanVideo-1.5</p>
        <p style="margin-top: 10px;">
            <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #0066cc; text-decoration: none;">
                Built with anycoder
            </a>
        </p>
    </div>
    """)
    
    with gr.Row(equal_height=True):
        with gr.Column(scale=1):
            gr.Markdown("### πŸ“Έ Input Image")
            input_image = gr.Image(
                label="Upload Image",
                type="pil",
                height=300,
                sources=["upload", "webcam", "clipboard"]
            )
            
            gr.Markdown("### ✍️ Prompt")
            input_prompt = gr.Textbox(
                label="Describe the video you want to generate",
                placeholder="Describe the motion, style, and content...",
                lines=4,
                max_lines=6
            )
            
            with gr.Accordion("βš™οΈ Advanced Settings", open=False):
                seed = gr.Number(
                    label="Seed",
                    value=1,
                    minimum=0,
                    maximum=999999,
                    step=1,
                    info="Random seed for reproducible results"
                )
                
                num_frames = gr.Slider(
                    label="Number of Frames",
                    minimum=49,
                    maximum=121,
                    value=121,
                    step=1,
                    info="Higher values = longer videos"
                )
                
                num_inference_steps = gr.Slider(
                    label="Inference Steps",
                    minimum=20,
                    maximum=100,
                    value=50,
                    step=1,
                    info="Higher values = better quality but slower"
                )
                
                fps = gr.Slider(
                    label="FPS",
                    minimum=12,
                    maximum=30,
                    value=24,
                    step=1,
                    info="Frames per second for output video"
                )
            
            generate_btn = gr.Button(
                "🎬 Generate Video",
                variant="primary",
                size="lg"
            )
            
        with gr.Column(scale=1):
            gr.Markdown("### πŸŽ₯ Generated Video")
            output_video = gr.Video(
                label="Output Video",
                height=400,
                autoplay=True,
                show_download_button=True
            )
            
            # Status message
            status = gr.Markdown("Ready to generate your video!", visible=True)
    
    # Examples section
    gr.Markdown("### πŸ’‘ Examples")
    gr.Examples(
        examples=create_examples(),
        inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
        outputs=output_video,
        fn=generate_video,
        cache_examples=False,
        label="Try these examples"
    )
    
    # Instructions
    with gr.Accordion("πŸ“– How to Use", open=False):
        gr.Markdown("""
        1. **Upload an Image**: Choose any image as the starting frame
        2. **Write a Prompt**: Describe the desired video content and motion
        3. **Adjust Settings**: Optionally modify seed, frames, and quality settings
        4. **Generate**: Click the button and wait for the magic to happen!
        
        **Tips**:
        - Use descriptive prompts with motion words (e.g., "flying", "dancing", "flowing")
        - Higher inference steps improve quality but take longer
        - The seed controls randomness - use the same seed for reproducible results
        - For best results, use clear, high-quality input images
        - This app uses the official Tencent HunyuanVideo-1.5 model
        """)
    
    # Event handler with loading states
    def generate_with_loading(image, prompt, seed_val, frames, steps, fps_val):
        status_msg = "πŸ”„ Generating video... This may take a few minutes."
        yield gr.update(), gr.update(), status_msg
        
        try:
            video_path = generate_video(image, prompt, seed_val, frames, steps, fps_val)
            success_msg = "βœ… Video generated successfully!"
            yield video_path, gr.update(), success_msg
        except Exception as e:
            error_msg = f"❌ Error: {str(e)}"
            yield gr.update(), gr.update(), error_msg
    
    generate_btn.click(
        fn=generate_with_loading,
        inputs=[input_image, input_prompt, seed, num_frames, num_inference_steps, fps],
        outputs=[output_video, generate_btn, status],
        show_progress="full"
    )

# Launch with Gradio 6 syntax
demo.launch(
    theme=custom_theme,
    css="""
    .gradio-container {
        max-width: 1200px !important;
        margin: auto !important;
    }
    """,
    footer_links=[
        {"label": "Official HunyuanVideo Model", "url": "https://huggingface.co/tencent/HunyuanVideo-1.5"},
        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
    ]
)