K00B404 commited on
Commit
8ac3922
Β·
verified Β·
1 Parent(s): b0f83cf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +687 -0
app.py ADDED
@@ -0,0 +1,687 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn.functional as F
4
+ from diffusers import StableDiffusionImg2ImgPipeline, DDIMScheduler
5
+ from PIL import Image
6
+ import numpy as np
7
+ from typing import List, Optional, Dict, Any
8
+ from collections import deque
9
+ import cv2
10
+ import os
11
+ import tempfile
12
+ import imageio
13
+ from datetime import datetime
14
+
15
+ class SimpleTemporalBuffer:
16
+ """Simplified temporal buffer for SD1.5 img2img"""
17
+
18
+ def __init__(self, buffer_size: int = 6):
19
+ self.buffer_size = buffer_size
20
+ self.frames = deque(maxlen=buffer_size)
21
+ self.frame_embeddings = deque(maxlen=buffer_size)
22
+ self.motion_vectors = deque(maxlen=buffer_size-1)
23
+
24
+ def add_frame(self, frame: Image.Image, embedding: Optional[torch.Tensor] = None):
25
+ """Add frame to buffer"""
26
+ try:
27
+ # Calculate optical flow if we have previous frames
28
+ if len(self.frames) > 0:
29
+ prev_frame = np.array(self.frames[-1])
30
+ curr_frame = np.array(frame)
31
+
32
+ # Convert to grayscale for optical flow
33
+ prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_RGB2GRAY)
34
+ curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_RGB2GRAY)
35
+
36
+ # Calculate optical flow
37
+ flow = cv2.calcOpticalFlowPyrLK(
38
+ prev_gray, curr_gray,
39
+ np.array([[frame.width//2, frame.height//2]], dtype=np.float32),
40
+ None
41
+ )[0]
42
+
43
+ if flow is not None:
44
+ motion_magnitude = np.linalg.norm(flow[0] - [frame.width//2, frame.height//2])
45
+ self.motion_vectors.append(motion_magnitude)
46
+ except Exception as e:
47
+ print(f"Motion calculation error: {e}")
48
+
49
+ self.frames.append(frame)
50
+ if embedding is not None:
51
+ self.frame_embeddings.append(embedding)
52
+
53
+ def get_reference_frame(self) -> Optional[Image.Image]:
54
+ """Get most recent frame as reference"""
55
+ return self.frames[-1] if self.frames else None
56
+
57
+ def get_motion_context(self) -> Dict[str, Any]:
58
+ """Get motion context for next frame generation"""
59
+ if len(self.motion_vectors) == 0:
60
+ return {"has_motion": False, "predicted_motion": 0.0}
61
+
62
+ # Simple motion prediction based on recent vectors
63
+ recent_motion = list(self.motion_vectors)[-3:] # Last 3 motions
64
+ avg_motion = np.mean(recent_motion)
65
+ motion_trend = recent_motion[-1] - recent_motion[0] if len(recent_motion) > 1 else 0
66
+
67
+ predicted_motion = avg_motion + motion_trend * 0.5
68
+
69
+ return {
70
+ "has_motion": True,
71
+ "current_motion": avg_motion,
72
+ "predicted_motion": predicted_motion,
73
+ "motion_trend": motion_trend,
74
+ "motion_history": recent_motion
75
+ }
76
+
77
+ class SD15FlexibleI2VGenerator:
78
+ """Flexible I2V generator using SD1.5 img2img pipeline"""
79
+
80
+ def __init__(
81
+ self,
82
+ model_id: str = "runwayml/stable-diffusion-v1-5",
83
+ device: str = "cuda" if torch.cuda.is_available() else "cpu"
84
+ ):
85
+ self.device = device
86
+ self.pipe = None
87
+ self.temporal_buffer = SimpleTemporalBuffer()
88
+ self.is_loaded = False
89
+
90
+ def load_model(self):
91
+ """Load the SD1.5 pipeline"""
92
+ if self.is_loaded:
93
+ return "Model already loaded"
94
+
95
+ try:
96
+ print(f"πŸš€ Loading SD1.5 pipeline on {self.device}...")
97
+
98
+ # Load pipeline with DDIM scheduler for better img2img
99
+ self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
100
+ "runwayml/stable-diffusion-v1-5",
101
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
102
+ safety_checker=None,
103
+ requires_safety_checker=False
104
+ )
105
+
106
+ # Use DDIM for more consistent results
107
+ self.pipe.scheduler = DDIMScheduler.from_config(self.pipe.scheduler.config)
108
+ self.pipe = self.pipe.to(self.device)
109
+
110
+ # Enable memory efficient attention
111
+ if self.device == "cuda":
112
+ self.pipe.enable_attention_slicing()
113
+ try:
114
+ self.pipe.enable_xformers_memory_efficient_attention()
115
+ except:
116
+ print("⚠️ xformers not available, using standard attention")
117
+
118
+ self.is_loaded = True
119
+ return "βœ… Model loaded successfully!"
120
+
121
+ except Exception as e:
122
+ return f"❌ Error loading model: {str(e)}"
123
+
124
+ def calculate_adaptive_strength(self, motion_context: Dict[str, Any], base_strength: float = 0.75) -> float:
125
+ """Calculate adaptive denoising strength based on motion"""
126
+ if not motion_context.get("has_motion", False):
127
+ return base_strength
128
+
129
+ motion = motion_context["current_motion"]
130
+
131
+ # More motion = less strength (preserve more of previous frame)
132
+ # Less motion = more strength (allow more change)
133
+ motion_factor = np.clip(motion / 50.0, 0.0, 1.0) # Normalize motion
134
+ adaptive_strength = base_strength * (1.0 - motion_factor * 0.3)
135
+
136
+ return np.clip(adaptive_strength, 0.3, 0.9)
137
+
138
+ def enhance_prompt_with_motion(self, base_prompt: str, motion_context: Dict[str, Any]) -> str:
139
+ """Enhance prompt based on motion context"""
140
+ if not motion_context.get("has_motion", False):
141
+ return base_prompt
142
+
143
+ motion = motion_context["current_motion"]
144
+ trend = motion_context.get("motion_trend", 0)
145
+
146
+ # Add motion descriptors based on analysis
147
+ if motion > 30:
148
+ if trend > 5:
149
+ motion_desc = ", fast movement, dynamic motion, motion blur"
150
+ else:
151
+ motion_desc = ", steady movement, continuous motion"
152
+ elif motion > 10:
153
+ motion_desc = ", gentle movement, smooth transition"
154
+ else:
155
+ motion_desc = ", subtle movement, slight change"
156
+
157
+ return base_prompt + motion_desc
158
+
159
+ def blend_frames(self, current_frame: Image.Image, reference_frame: Image.Image, blend_ratio: float = 0.15) -> Image.Image:
160
+ """Blend current frame with reference for temporal consistency"""
161
+ current_array = np.array(current_frame, dtype=np.float32)
162
+ reference_array = np.array(reference_frame, dtype=np.float32)
163
+
164
+ # Blend frames
165
+ blended_array = current_array * (1 - blend_ratio) + reference_array * blend_ratio
166
+ blended_array = np.clip(blended_array, 0, 255).astype(np.uint8)
167
+
168
+ return Image.fromarray(blended_array)
169
+
170
+ @torch.no_grad()
171
+ def generate_frame_batch(
172
+ self,
173
+ init_image: Image.Image,
174
+ prompt: str,
175
+ num_frames: int = 1,
176
+ strength: float = 0.75,
177
+ guidance_scale: float = 7.5,
178
+ num_inference_steps: int = 20,
179
+ generator: Optional[torch.Generator] = None,
180
+ progress_callback=None
181
+ ) -> List[Image.Image]:
182
+ """Generate a batch of frames using img2img"""
183
+
184
+ if not self.is_loaded:
185
+ raise ValueError("Model not loaded. Please load the model first.")
186
+
187
+ frames = []
188
+ current_image = init_image
189
+
190
+ for i in range(num_frames):
191
+ if progress_callback:
192
+ progress_callback(f"Generating frame {i+1}/{num_frames}")
193
+
194
+ # Get motion context
195
+ motion_context = self.temporal_buffer.get_motion_context()
196
+
197
+ # Adaptive parameters based on motion
198
+ adaptive_strength = self.calculate_adaptive_strength(motion_context, strength)
199
+ enhanced_prompt = self.enhance_prompt_with_motion(prompt, motion_context)
200
+
201
+ # Generate frame
202
+ result = self.pipe(
203
+ prompt=enhanced_prompt,
204
+ image=current_image,
205
+ strength=adaptive_strength,
206
+ guidance_scale=guidance_scale,
207
+ num_inference_steps=num_inference_steps,
208
+ generator=generator
209
+ )
210
+
211
+ generated_frame = result.images[0]
212
+
213
+ # Apply temporal consistency blending
214
+ if len(self.temporal_buffer.frames) > 0:
215
+ reference_frame = self.temporal_buffer.get_reference_frame()
216
+ blend_ratio = 0.1 if motion_context.get("current_motion", 0) > 20 else 0.2
217
+ generated_frame = self.blend_frames(generated_frame, reference_frame, blend_ratio)
218
+
219
+ # Update buffer
220
+ self.temporal_buffer.add_frame(generated_frame)
221
+ frames.append(generated_frame)
222
+
223
+ # Use generated frame as input for next iteration
224
+ current_image = generated_frame
225
+
226
+ return frames
227
+
228
+ def generate_i2v_sequence(
229
+ self,
230
+ init_image: Image.Image,
231
+ prompt: str,
232
+ total_frames: int = 16,
233
+ frames_per_batch: int = 2,
234
+ strength: float = 0.75,
235
+ guidance_scale: float = 7.5,
236
+ num_inference_steps: int = 20,
237
+ seed: Optional[int] = None,
238
+ progress_callback=None
239
+ ) -> List[Image.Image]:
240
+ """Generate I2V sequence with flexible batch sizes"""
241
+
242
+ if not self.is_loaded:
243
+ raise ValueError("Model not loaded. Please load the model first.")
244
+
245
+ # Setup generator
246
+ generator = torch.Generator(device=self.device)
247
+ if seed is not None:
248
+ generator.manual_seed(seed)
249
+
250
+ # Reset temporal buffer and add initial frame
251
+ self.temporal_buffer = SimpleTemporalBuffer()
252
+ self.temporal_buffer.add_frame(init_image)
253
+
254
+ all_frames = [init_image] # Start with initial frame
255
+ frames_generated = 1
256
+ current_reference = init_image
257
+
258
+ # Generate in batches
259
+ while frames_generated < total_frames:
260
+ remaining_frames = total_frames - frames_generated
261
+ current_batch_size = min(frames_per_batch, remaining_frames)
262
+
263
+ if progress_callback:
264
+ progress_callback(f"Batch: Generating frames {frames_generated+1}-{frames_generated+current_batch_size}")
265
+
266
+ # Generate batch
267
+ batch_frames = self.generate_frame_batch(
268
+ init_image=current_reference,
269
+ prompt=prompt,
270
+ num_frames=current_batch_size,
271
+ strength=strength,
272
+ guidance_scale=guidance_scale,
273
+ num_inference_steps=num_inference_steps,
274
+ generator=generator,
275
+ progress_callback=progress_callback
276
+ )
277
+
278
+ # Add to results
279
+ all_frames.extend(batch_frames)
280
+ frames_generated += current_batch_size
281
+
282
+ # Update reference for next batch
283
+ current_reference = batch_frames[-1]
284
+
285
+ return all_frames
286
+
287
+ # Global generator instance
288
+ generator = SD15FlexibleI2VGenerator()
289
+
290
+ def load_model_interface():
291
+ """Interface function to load the model"""
292
+ status = generator.load_model()
293
+ return status
294
+
295
+ def create_frames_to_gif(frames: List[Image.Image], duration: int = 200) -> str:
296
+ """Convert frames to GIF and return file path"""
297
+ temp_dir = tempfile.mkdtemp()
298
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
299
+ gif_path = os.path.join(temp_dir, f"i2v_sequence_{timestamp}.gif")
300
+
301
+ frames[0].save(
302
+ gif_path,
303
+ save_all=True,
304
+ append_images=frames[1:],
305
+ duration=duration,
306
+ loop=0
307
+ )
308
+
309
+ return gif_path
310
+
311
+ def create_frames_to_video(frames: List[Image.Image], fps: int = 8) -> str:
312
+ """Convert frames to MP4 video and return file path"""
313
+ temp_dir = tempfile.mkdtemp()
314
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
315
+ video_path = os.path.join(temp_dir, f"i2v_sequence_{timestamp}.mp4")
316
+
317
+ try:
318
+ with imageio.get_writer(video_path, fps=fps) as writer:
319
+ for frame in frames:
320
+ writer.append_data(np.array(frame))
321
+ return video_path
322
+ except ImportError:
323
+ # Fallback to GIF if imageio not available
324
+ return create_frames_to_gif(frames, duration=int(1000/fps))
325
+
326
+ def generate_i2v_interface(
327
+ init_image,
328
+ prompt,
329
+ total_frames,
330
+ frames_per_batch,
331
+ strength,
332
+ guidance_scale,
333
+ num_inference_steps,
334
+ seed,
335
+ output_format,
336
+ progress=gr.Progress()
337
+ ):
338
+ """Main interface function for I2V generation"""
339
+
340
+ if init_image is None:
341
+ return None, None, "❌ Please upload an initial image"
342
+
343
+ if not prompt.strip():
344
+ return None, None, "❌ Please enter a prompt"
345
+
346
+ try:
347
+ # Progress callback
348
+ def update_progress(message):
349
+ progress(0.5, desc=message)
350
+
351
+ progress(0.1, desc="Starting generation...")
352
+
353
+ # Resize image to 512x512 if needed
354
+ if init_image.size != (512, 512):
355
+ init_image = init_image.resize((512, 512), Image.Resampling.LANCZOS)
356
+
357
+ # Generate frames
358
+ frames = generator.generate_i2v_sequence(
359
+ init_image=init_image,
360
+ prompt=prompt,
361
+ total_frames=total_frames,
362
+ frames_per_batch=frames_per_batch,
363
+ strength=strength,
364
+ guidance_scale=guidance_scale,
365
+ num_inference_steps=num_inference_steps,
366
+ seed=seed if seed > 0 else None,
367
+ progress_callback=update_progress
368
+ )
369
+
370
+ progress(0.8, desc="Creating output file...")
371
+
372
+ # Create output file
373
+ if output_format == "GIF":
374
+ output_path = create_frames_to_gif(frames, duration=200)
375
+ else: # MP4
376
+ output_path = create_frames_to_video(frames, fps=8)
377
+
378
+ progress(1.0, desc="Complete!")
379
+
380
+ # Return last frame as preview and the output file
381
+ return frames[-1], output_path, f"βœ… Generated {len(frames)} frames successfully!"
382
+
383
+ except Exception as e:
384
+ return None, None, f"❌ Error: {str(e)}"
385
+
386
+ def generate_variable_pattern_interface(
387
+ init_image,
388
+ prompt,
389
+ total_frames,
390
+ batch_pattern_str,
391
+ strength,
392
+ guidance_scale,
393
+ num_inference_steps,
394
+ seed,
395
+ output_format,
396
+ progress=gr.Progress()
397
+ ):
398
+ """Interface for variable batch pattern generation"""
399
+
400
+ if init_image is None:
401
+ return None, None, "❌ Please upload an initial image"
402
+
403
+ if not prompt.strip():
404
+ return None, None, "❌ Please enter a prompt"
405
+
406
+ try:
407
+ # Parse batch pattern
408
+ batch_pattern = [int(x.strip()) for x in batch_pattern_str.split(",")]
409
+ if not batch_pattern or any(x <= 0 for x in batch_pattern):
410
+ raise ValueError("Invalid batch pattern")
411
+
412
+ progress(0.1, desc="Starting variable pattern generation...")
413
+
414
+ # Resize image
415
+ if init_image.size != (512, 512):
416
+ init_image = init_image.resize((512, 512), Image.Resampling.LANCZOS)
417
+
418
+ # Generate with variable pattern
419
+ frames = [init_image]
420
+ frames_generated = 1
421
+ current_reference = init_image
422
+ pattern_idx = 0
423
+
424
+ generator.temporal_buffer = SimpleTemporalBuffer()
425
+ generator.temporal_buffer.add_frame(init_image)
426
+
427
+ gen = torch.Generator(device=generator.device)
428
+ if seed > 0:
429
+ gen.manual_seed(seed)
430
+
431
+ while frames_generated < total_frames:
432
+ current_batch_size = batch_pattern[pattern_idx % len(batch_pattern)]
433
+ remaining_frames = total_frames - frames_generated
434
+ actual_batch_size = min(current_batch_size, remaining_frames)
435
+
436
+ progress(frames_generated / total_frames,
437
+ desc=f"Pattern step {pattern_idx+1}: {actual_batch_size} frames")
438
+
439
+ batch_frames = generator.generate_frame_batch(
440
+ init_image=current_reference,
441
+ prompt=prompt,
442
+ num_frames=actual_batch_size,
443
+ strength=strength,
444
+ guidance_scale=guidance_scale,
445
+ num_inference_steps=num_inference_steps,
446
+ generator=gen
447
+ )
448
+
449
+ frames.extend(batch_frames)
450
+ frames_generated += actual_batch_size
451
+ current_reference = batch_frames[-1]
452
+ pattern_idx += 1
453
+
454
+ progress(0.9, desc="Creating output file...")
455
+
456
+ # Create output
457
+ final_frames = frames[:total_frames+1] # Include initial frame
458
+ if output_format == "GIF":
459
+ output_path = create_frames_to_gif(final_frames, duration=200)
460
+ else:
461
+ output_path = create_frames_to_video(final_frames, fps=8)
462
+
463
+ progress(1.0, desc="Complete!")
464
+
465
+ return final_frames[-1], output_path, f"βœ… Generated {len(final_frames)} frames with pattern {batch_pattern}!"
466
+
467
+ except Exception as e:
468
+ return None, None, f"❌ Error: {str(e)}"
469
+
470
+ # Create Gradio interface
471
+ def create_gradio_app():
472
+ """Create the main Gradio application"""
473
+
474
+ with gr.Blocks(title="SD1.5 Flexible I2V Generator", theme=gr.themes.Soft()) as app:
475
+
476
+ gr.Markdown("""
477
+ # 🎬 SD1.5 Flexible I2V Generator
478
+
479
+ Generate image-to-video sequences with **flexible batch processing** and **temporal consistency**!
480
+
481
+ ## Key Features:
482
+ - 🎯 **Flexible Batch Sizes**: Generate 1, 2, 3+ frames at a time
483
+ - πŸ”„ **Motion-Aware Processing**: Adapts based on detected motion
484
+ - 🎨 **Temporal Consistency**: Smooth transitions between frames
485
+ - πŸ“ˆ **Variable Patterns**: Dynamic batch sizing patterns
486
+ """)
487
+
488
+ # Model loading section
489
+ with gr.Row():
490
+ load_btn = gr.Button("πŸš€ Load SD1.5 Model", variant="primary", size="lg")
491
+ model_status = gr.Textbox(
492
+ label="Model Status",
493
+ value="Model not loaded. Click 'Load SD1.5 Model' to start.",
494
+ interactive=False
495
+ )
496
+
497
+ load_btn.click(load_model_interface, outputs=model_status)
498
+
499
+ # Main interface tabs
500
+ with gr.Tabs():
501
+
502
+ # Fixed batch size tab
503
+ with gr.Tab("🎯 Fixed Batch Generation"):
504
+ with gr.Row():
505
+ with gr.Column(scale=1):
506
+ init_image_1 = gr.Image(
507
+ label="Initial Image",
508
+ type="pil",
509
+ height=300
510
+ )
511
+ prompt_1 = gr.Textbox(
512
+ label="Prompt",
513
+ placeholder="e.g., a cat walking through a peaceful garden, cinematic lighting",
514
+ lines=3
515
+ )
516
+
517
+ with gr.Row():
518
+ total_frames_1 = gr.Slider(
519
+ label="Total Frames",
520
+ minimum=4,
521
+ maximum=32,
522
+ value=12,
523
+ step=1
524
+ )
525
+ frames_per_batch_1 = gr.Slider(
526
+ label="Frames per Batch (Key Parameter!)",
527
+ minimum=1,
528
+ maximum=4,
529
+ value=2,
530
+ step=1
531
+ )
532
+
533
+ with gr.Accordion("Advanced Settings", open=False):
534
+ strength_1 = gr.Slider(
535
+ label="Strength",
536
+ minimum=0.3,
537
+ maximum=0.9,
538
+ value=0.75,
539
+ step=0.05
540
+ )
541
+ guidance_scale_1 = gr.Slider(
542
+ label="Guidance Scale",
543
+ minimum=3.0,
544
+ maximum=15.0,
545
+ value=7.5,
546
+ step=0.5
547
+ )
548
+ num_inference_steps_1 = gr.Slider(
549
+ label="Inference Steps",
550
+ minimum=10,
551
+ maximum=50,
552
+ value=20,
553
+ step=5
554
+ )
555
+ seed_1 = gr.Number(
556
+ label="Seed (-1 for random)",
557
+ value=-1
558
+ )
559
+ output_format_1 = gr.Radio(
560
+ label="Output Format",
561
+ choices=["GIF", "MP4"],
562
+ value="GIF"
563
+ )
564
+
565
+ generate_btn_1 = gr.Button("🎬 Generate I2V Sequence", variant="primary", size="lg")
566
+
567
+ with gr.Column(scale=1):
568
+ preview_1 = gr.Image(label="Last Frame Preview", height=300)
569
+ output_file_1 = gr.File(label="Download Generated Video/GIF")
570
+ status_1 = gr.Textbox(label="Status", interactive=False)
571
+
572
+ generate_btn_1.click(
573
+ generate_i2v_interface,
574
+ inputs=[
575
+ init_image_1, prompt_1, total_frames_1, frames_per_batch_1,
576
+ strength_1, guidance_scale_1, num_inference_steps_1, seed_1, output_format_1
577
+ ],
578
+ outputs=[preview_1, output_file_1, status_1]
579
+ )
580
+
581
+ # Variable pattern tab
582
+ with gr.Tab("πŸ“ˆ Variable Pattern Generation"):
583
+ with gr.Row():
584
+ with gr.Column(scale=1):
585
+ init_image_2 = gr.Image(
586
+ label="Initial Image",
587
+ type="pil",
588
+ height=300
589
+ )
590
+ prompt_2 = gr.Textbox(
591
+ label="Prompt",
592
+ placeholder="e.g., smooth camera movement through a scene",
593
+ lines=3
594
+ )
595
+
596
+ total_frames_2 = gr.Slider(
597
+ label="Total Frames",
598
+ minimum=6,
599
+ maximum=40,
600
+ value=16,
601
+ step=1
602
+ )
603
+
604
+ batch_pattern_2 = gr.Textbox(
605
+ label="Batch Pattern (comma-separated)",
606
+ value="1,2,3,2,1",
607
+ placeholder="e.g., 1,2,3,2,1 or 2,4,2"
608
+ )
609
+
610
+ gr.Markdown("""
611
+ **Pattern Examples:**
612
+ - `1,2,3,2,1` - Start slow, ramp up, slow down
613
+ - `2,2,2,2` - Consistent 2-frame batches
614
+ - `1,3,1,3` - Alternating single and triple
615
+ """)
616
+
617
+ with gr.Accordion("Advanced Settings", open=False):
618
+ strength_2 = gr.Slider(label="Strength", minimum=0.3, maximum=0.9, value=0.75, step=0.05)
619
+ guidance_scale_2 = gr.Slider(label="Guidance Scale", minimum=3.0, maximum=15.0, value=7.5, step=0.5)
620
+ num_inference_steps_2 = gr.Slider(label="Inference Steps", minimum=10, maximum=50, value=20, step=5)
621
+ seed_2 = gr.Number(label="Seed (-1 for random)", value=-1)
622
+ output_format_2 = gr.Radio(label="Output Format", choices=["GIF", "MP4"], value="GIF")
623
+
624
+ generate_btn_2 = gr.Button("🎨 Generate with Pattern", variant="primary", size="lg")
625
+
626
+ with gr.Column(scale=1):
627
+ preview_2 = gr.Image(label="Last Frame Preview", height=300)
628
+ output_file_2 = gr.File(label="Download Generated Video/GIF")
629
+ status_2 = gr.Textbox(label="Status", interactive=False)
630
+
631
+ generate_btn_2.click(
632
+ generate_variable_pattern_interface,
633
+ inputs=[
634
+ init_image_2, prompt_2, total_frames_2, batch_pattern_2,
635
+ strength_2, guidance_scale_2, num_inference_steps_2, seed_2, output_format_2
636
+ ],
637
+ outputs=[preview_2, output_file_2, status_2]
638
+ )
639
+
640
+ # Examples section
641
+ with gr.Accordion("πŸ“ Example Prompts & Tips", open=False):
642
+ gr.Markdown("""
643
+ ## 🎯 Good Prompts for I2V:
644
+ - `a peaceful lake with gentle ripples, soft sunlight, cinematic`
645
+ - `a cat slowly walking through a garden, smooth movement`
646
+ - `camera slowly panning across a mountain landscape`
647
+ - `a flower blooming in timelapse, natural lighting`
648
+ - `gentle waves on a beach, golden hour lighting`
649
+
650
+ ## πŸ›  Parameter Tips:
651
+ - **Frames per Batch**:
652
+ - `1` = Maximum consistency, slower generation
653
+ - `2-3` = Balanced quality and speed
654
+ - `4+` = Faster but less consistent
655
+ - **Strength**:
656
+ - `0.6-0.7` = Subtle changes
657
+ - `0.7-0.8` = Moderate animation
658
+ - `0.8-0.9` = More dramatic changes
659
+ - **Batch Patterns**:
660
+ - Use `1,2,3,2,1` for organic acceleration/deceleration
661
+ - Use consistent values like `2,2,2` for steady pacing
662
+ """)
663
+
664
+ gr.Markdown("""
665
+ ---
666
+
667
+ ## πŸš€ **Innovation Highlights:**
668
+
669
+ This app demonstrates **flexible batch processing** for I2V generation:
670
+ - Generate multiple frames simultaneously with `frames_per_batch`
671
+ - Motion-aware strength adaptation based on optical flow
672
+ - Temporal consistency through intelligent frame blending
673
+ - Variable stepping patterns for dynamic control
674
+
675
+ **Built with SD1.5 img2img pipeline + custom temporal processing!**
676
+ """)
677
+
678
+ return app
679
+
680
+ if __name__ == "__main__":
681
+ app = create_gradio_app()
682
+ app.launch(
683
+ server_name="0.0.0.0",
684
+ server_port=7860,
685
+ share=False,
686
+ debug=True
687
+ )