DawnC commited on
Commit
a0d6949
Β·
verified Β·
1 Parent(s): 3b2f252

Upload 9 files

Browse files
Files changed (9) hide show
  1. FlowFacade.py +156 -0
  2. ResourceManager.py +174 -0
  3. TextProcessor.py +199 -0
  4. VideoEngine.py +415 -0
  5. app.py +146 -0
  6. css_style.py +279 -0
  7. prompt_examples.py +93 -0
  8. requirements.txt +31 -0
  9. ui_manager.py +274 -0
FlowFacade.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import numpy as np
4
+ from PIL import Image
5
+ from typing import Tuple, Optional
6
+ from VideoEngine import VideoEngine
7
+ from TextProcessor import TextProcessor
8
+
9
+ try:
10
+ import spaces
11
+ HAS_SPACES = True
12
+ except ImportError:
13
+ HAS_SPACES = False
14
+ class spaces:
15
+ @staticmethod
16
+ def GPU(duration=120):
17
+ def decorator(func):
18
+ return func
19
+ return decorator
20
+
21
+
22
+ class FlowFacade:
23
+ def __init__(self):
24
+ self.is_spaces = os.environ.get('SPACE_ID') is not None
25
+ self.video_engine = VideoEngine()
26
+ self.text_processor = TextProcessor(resource_manager=None)
27
+ print("βœ“ DeltaFlow initialized")
28
+
29
+ def _calculate_gpu_duration(self, image: Image.Image, duration_seconds: float,
30
+ num_inference_steps: int, enable_prompt_expansion: bool) -> int:
31
+ BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
32
+ BASE_STEP_DURATION = 15
33
+
34
+ resized_image = self.video_engine.resize_image(image)
35
+ width, height = resized_image.width, resized_image.height
36
+ frames = self.video_engine.get_num_frames(duration_seconds)
37
+
38
+ factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
39
+ step_duration = BASE_STEP_DURATION * factor ** 1.5
40
+ total_duration = 10 + int(num_inference_steps) * step_duration
41
+
42
+ if enable_prompt_expansion:
43
+ total_duration += 30
44
+
45
+ return int(total_duration)
46
+
47
+ @spaces.GPU(duration=120)
48
+ def generate_video_from_image(self, image: Image.Image, user_instruction: str,
49
+ duration_seconds: float = 3.0, num_inference_steps: int = 4,
50
+ guidance_scale: float = 1.0, guidance_scale_2: float = 1.0,
51
+ seed: int = 42, randomize_seed: bool = False,
52
+ enable_prompt_expansion: bool = False,
53
+ progress=None) -> Tuple[str, str, int]:
54
+ if image is None:
55
+ raise ValueError("No image provided")
56
+ if not user_instruction or user_instruction.strip() == "":
57
+ raise ValueError("Please provide a motion instruction")
58
+
59
+ try:
60
+ if randomize_seed:
61
+ seed = np.random.randint(0, 2147483647)
62
+
63
+ if enable_prompt_expansion:
64
+ if progress:
65
+ progress(0.1, desc="AI expanding your prompt...")
66
+ final_prompt = self.text_processor.process(user_instruction, auto_unload=True)
67
+ else:
68
+ final_prompt = user_instruction
69
+
70
+ if progress:
71
+ progress(0.2, desc="Preparing GPU memory...")
72
+
73
+ if not self.video_engine.is_loaded:
74
+ import gc
75
+ gc.collect()
76
+ torch.cuda.empty_cache()
77
+ torch.cuda.ipc_collect()
78
+
79
+ if progress:
80
+ progress(0.25, desc="Loading video generation model...")
81
+ self.video_engine.load_model()
82
+
83
+ gc.collect()
84
+ torch.cuda.empty_cache()
85
+
86
+ if progress:
87
+ progress(0.3, desc=f"Generating video ({num_inference_steps} steps)...")
88
+
89
+ video_path = self.video_engine.generate_video(
90
+ image=image, prompt=final_prompt, duration_seconds=duration_seconds,
91
+ num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,
92
+ guidance_scale_2=guidance_scale_2, seed=seed
93
+ )
94
+
95
+ if progress:
96
+ progress(1.0, desc="Complete!")
97
+
98
+ return video_path, final_prompt, seed
99
+
100
+ except Exception as e:
101
+ import traceback
102
+ print(f"\nβœ— Generation error: {type(e).__name__}: {str(e)}")
103
+ if os.environ.get('DEBUG'):
104
+ print(traceback.format_exc())
105
+ raise RuntimeError(f"Generation failed: {type(e).__name__}: {str(e)}")
106
+
107
+ def cleanup(self) -> None:
108
+ try:
109
+ if hasattr(self.text_processor, 'is_loaded') and self.text_processor.is_loaded:
110
+ self.text_processor.unload_model()
111
+ torch.cuda.empty_cache()
112
+ except Exception as e:
113
+ if os.environ.get('DEBUG'):
114
+ print(f"⚠ Cleanup warning: {str(e)}")
115
+
116
+ def get_system_info(self) -> dict:
117
+ quantization_type = "None"
118
+ if torch.cuda.is_available():
119
+ cuda_cap = torch.cuda.get_device_capability()
120
+ fp8_supported = cuda_cap[0] > 8 or (cuda_cap[0] == 8 and cuda_cap[1] >= 9)
121
+ quantization_type = "FP8" if fp8_supported else "INT8"
122
+
123
+ return {
124
+ "device": self.video_engine.device,
125
+ "video_model": VideoEngine.MODEL_ID,
126
+ "text_model": TextProcessor.MODEL_ID,
127
+ "lightning_lora": "Enabled",
128
+ "quantization": quantization_type,
129
+ "optimizations": [
130
+ "Lightning LoRA (4-8 steps)",
131
+ f"{quantization_type} Quantization",
132
+ "AoT Compilation (if available)"
133
+ ]
134
+ }
135
+
136
+ def validate_image(self, image: Image.Image) -> bool:
137
+ if image is None:
138
+ return False
139
+
140
+ min_dim, max_dim = 256, 4096
141
+
142
+ if image.width < min_dim or image.height < min_dim:
143
+ print(f"⚠ Image too small: {image.width}x{image.height}")
144
+ return False
145
+
146
+ if image.width > max_dim or image.height > max_dim:
147
+ print(f"⚠ Image too large: {image.width}x{image.height}")
148
+ return False
149
+
150
+ return True
151
+
152
+ def __del__(self):
153
+ try:
154
+ self.cleanup()
155
+ except:
156
+ pass
ResourceManager.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile RescourceManager.py
2
+ """
3
+ DeltaFlow - Resource Manager
4
+ Handles GPU memory allocation, deallocation, and cache management
5
+ """
6
+
7
+ import gc
8
+ import torch
9
+ from typing import Optional
10
+
11
+
12
+ class ResourceManager:
13
+ """
14
+ Centralized GPU resource management for model lifecycle.
15
+ Ensures efficient memory usage and prevents OOM errors.
16
+ """
17
+
18
+ def __init__(self):
19
+ """Initialize resource manager and detect device."""
20
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ self.current_model = None
22
+ self.model_registry = {}
23
+
24
+ def get_device(self) -> str:
25
+ """
26
+ Get current compute device.
27
+
28
+ Returns:
29
+ str: 'cuda' or 'cpu'
30
+ """
31
+ return self.device
32
+
33
+ def register_model(self, model_name: str, model_instance: object) -> None:
34
+ """
35
+ Register a model instance for tracking.
36
+
37
+ Args:
38
+ model_name (str): Identifier for the model
39
+ model_instance (object): The model object
40
+ """
41
+ self.model_registry[model_name] = model_instance
42
+ self.current_model = model_name
43
+ print(f"βœ“ Model registered: {model_name}")
44
+
45
+ def unregister_model(self, model_name: str) -> None:
46
+ """
47
+ Unregister and cleanup a specific model.
48
+
49
+ Args:
50
+ model_name (str): Identifier of model to remove
51
+ """
52
+ if model_name in self.model_registry:
53
+ del self.model_registry[model_name]
54
+ if self.current_model == model_name:
55
+ self.current_model = None
56
+ print(f"βœ“ Model unregistered: {model_name}")
57
+
58
+ def clear_cache(self, aggressive: bool = False) -> None:
59
+ """
60
+ Clear CUDA cache and run garbage collection.
61
+
62
+ Args:
63
+ aggressive (bool): If True, performs additional cleanup steps
64
+ """
65
+ if self.device == "cuda":
66
+ # Standard cache clearing
67
+ torch.cuda.empty_cache()
68
+ torch.cuda.ipc_collect()
69
+
70
+ if aggressive:
71
+ # Aggressive cleanup for critical memory situations
72
+ gc.collect()
73
+ with torch.cuda.device(self.device):
74
+ torch.cuda.empty_cache()
75
+ torch.cuda.synchronize()
76
+
77
+ print(f"βœ“ CUDA cache cleared (aggressive={aggressive})")
78
+ else:
79
+ gc.collect()
80
+ print("βœ“ CPU memory garbage collected")
81
+
82
+ def cleanup_model(self, model_instance: Optional[object] = None) -> None:
83
+ """
84
+ Safely cleanup a model instance and free GPU memory.
85
+
86
+ Args:
87
+ model_instance (Optional[object]): Specific model to cleanup.
88
+ If None, cleans all registered models.
89
+ """
90
+ if model_instance is not None:
91
+ # Cleanup specific model
92
+ if hasattr(model_instance, 'to'):
93
+ model_instance.to('cpu')
94
+ del model_instance
95
+ else:
96
+ # Cleanup all registered models
97
+ for name, model in list(self.model_registry.items()):
98
+ if hasattr(model, 'to'):
99
+ model.to('cpu')
100
+ del model
101
+ self.unregister_model(name)
102
+
103
+ # Force cleanup
104
+ gc.collect()
105
+ self.clear_cache(aggressive=True)
106
+ print("βœ“ Model cleanup completed")
107
+
108
+ def get_memory_stats(self) -> dict:
109
+ """
110
+ Get current GPU memory statistics.
111
+
112
+ Returns:
113
+ dict: Memory statistics (allocated, reserved, free)
114
+ """
115
+ if self.device == "cuda" and torch.cuda.is_available():
116
+ allocated = torch.cuda.memory_allocated() / 1024**3 # GB
117
+ reserved = torch.cuda.memory_reserved() / 1024**3 # GB
118
+ total = torch.cuda.get_device_properties(0).total_memory / 1024**3
119
+
120
+ return {
121
+ "allocated_gb": round(allocated, 2),
122
+ "reserved_gb": round(reserved, 2),
123
+ "total_gb": round(total, 2),
124
+ "free_gb": round(total - allocated, 2)
125
+ }
126
+ else:
127
+ return {
128
+ "allocated_gb": 0,
129
+ "reserved_gb": 0,
130
+ "total_gb": 0,
131
+ "free_gb": 0
132
+ }
133
+
134
+ def ensure_memory_available(self, required_gb: float = 2.0) -> bool:
135
+ """
136
+ Check if sufficient GPU memory is available.
137
+
138
+ Args:
139
+ required_gb (float): Required memory in GB
140
+
141
+ Returns:
142
+ bool: True if memory is available, False otherwise
143
+ """
144
+ stats = self.get_memory_stats()
145
+ available = stats["free_gb"]
146
+
147
+ if available < required_gb:
148
+ print(f"⚠ Low memory: {available:.2f}GB available, {required_gb:.2f}GB required")
149
+ # Attempt cleanup
150
+ self.clear_cache(aggressive=True)
151
+ stats = self.get_memory_stats()
152
+ available = stats["free_gb"]
153
+
154
+ return available >= required_gb
155
+
156
+ def switch_model_context(self, from_model: str, to_model: str) -> None:
157
+ """
158
+ Handle model switching with proper cleanup.
159
+
160
+ Args:
161
+ from_model (str): Current model to unload
162
+ to_model (str): Next model to prepare for
163
+ """
164
+ print(f"β†’ Switching context: {from_model} β†’ {to_model}")
165
+
166
+ # Unregister old model
167
+ self.unregister_model(from_model)
168
+
169
+ # Aggressive cleanup before loading new model
170
+ self.clear_cache(aggressive=True)
171
+
172
+ # Update current model tracker
173
+ self.current_model = to_model
174
+ print(f"βœ“ Context switched to {to_model}")
TextProcessor.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile text_processor.py
2
+ """
3
+ DeltaFlow - Text Processor
4
+ Handles semantic expansion using Qwen2.5-0.5B-Instruct
5
+ Converts brief instructions into detailed motion descriptions
6
+ """
7
+
8
+ import gc
9
+ import traceback
10
+ from typing import Optional
11
+
12
+ import torch
13
+ import ftfy
14
+ import sentencepiece
15
+ from transformers import AutoModelForCausalLM, AutoTokenizer
16
+
17
+
18
+ class TextProcessor:
19
+ """
20
+ Semantic expansion engine using Qwen2.5-0.5B.
21
+ Transforms user inputs into motion-rich prompts for video generation.
22
+ """
23
+
24
+ MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
25
+ MAX_OUTPUT_LENGTH = 100 # Token limit to ensure ~50 words
26
+
27
+ def __init__(self, resource_manager: Optional[object] = None):
28
+ """
29
+ Initialize TextProcessor with optional resource management.
30
+
31
+ Args:
32
+ resource_manager: Optional resource manager instance
33
+ """
34
+ self.resource_manager = resource_manager
35
+
36
+ # Determine device
37
+ if resource_manager is not None:
38
+ self.device = resource_manager.get_device()
39
+ else:
40
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
41
+
42
+ self.model: Optional[AutoModelForCausalLM] = None
43
+ self.tokenizer: Optional[AutoTokenizer] = None
44
+ self.is_loaded = False
45
+
46
+ def load_model(self) -> None:
47
+ """Load Qwen model and tokenizer."""
48
+ if self.is_loaded:
49
+ print("⚠ TextProcessor already loaded, skipping...")
50
+ return
51
+
52
+ try:
53
+ print("β†’ Loading Qwen2.5-0.5B-Instruct...")
54
+
55
+ self.tokenizer = AutoTokenizer.from_pretrained(
56
+ self.MODEL_ID,
57
+ trust_remote_code=True
58
+ )
59
+
60
+ self.model = AutoModelForCausalLM.from_pretrained(
61
+ self.MODEL_ID,
62
+ torch_dtype=torch.bfloat16,
63
+ device_map="auto",
64
+ trust_remote_code=True
65
+ )
66
+
67
+ if self.resource_manager is not None:
68
+ self.resource_manager.register_model("TextProcessor", self.model)
69
+
70
+ self.is_loaded = True
71
+ print("βœ“ TextProcessor loaded successfully")
72
+
73
+ except Exception as e:
74
+ print(f"βœ— Error loading TextProcessor: {str(e)}")
75
+ raise
76
+
77
+ def unload_model(self) -> None:
78
+ """Unload model and free GPU memory."""
79
+ if not self.is_loaded:
80
+ return
81
+
82
+ try:
83
+ if self.model is not None:
84
+ self.model.to('cpu')
85
+ del self.model
86
+ self.model = None
87
+
88
+ if self.tokenizer is not None:
89
+ del self.tokenizer
90
+ self.tokenizer = None
91
+
92
+ if self.resource_manager is not None:
93
+ self.resource_manager.unregister_model("TextProcessor")
94
+ self.resource_manager.clear_cache(aggressive=True)
95
+ else:
96
+ gc.collect()
97
+ if torch.cuda.is_available():
98
+ torch.cuda.empty_cache()
99
+
100
+ self.is_loaded = False
101
+ print("βœ“ TextProcessor unloaded")
102
+
103
+ except Exception as e:
104
+ print(f"⚠ Error during TextProcessor unload: {str(e)}")
105
+
106
+ def expand_prompt(self, user_input: str) -> str:
107
+ """
108
+ Convert user's brief instruction into detailed motion description.
109
+
110
+ Args:
111
+ user_input: User's original instruction
112
+
113
+ Returns:
114
+ str: Expanded prompt for video generation (≀50 words)
115
+ """
116
+ if not self.is_loaded:
117
+ raise RuntimeError("TextProcessor not loaded. Call load_model() first.")
118
+
119
+ system_prompt = """You are a motion description expert. Convert the user's brief instruction into a detailed, dynamic prompt for video generation.
120
+
121
+ Focus on:
122
+ - Camera movements (pan, zoom, tilt, tracking)
123
+ - Subject actions and motions
124
+ - Scene dynamics and atmosphere
125
+ - Temporal flow and transitions
126
+
127
+ Keep output under 50 words. Use vivid, cinematic language. English only."""
128
+
129
+ try:
130
+ messages = [
131
+ {"role": "system", "content": system_prompt},
132
+ {"role": "user", "content": user_input}
133
+ ]
134
+
135
+ text = self.tokenizer.apply_chat_template(
136
+ messages,
137
+ tokenize=False,
138
+ add_generation_prompt=True
139
+ )
140
+
141
+ model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)
142
+
143
+ with torch.no_grad():
144
+ generated_ids = self.model.generate(
145
+ **model_inputs,
146
+ max_new_tokens=self.MAX_OUTPUT_LENGTH,
147
+ do_sample=True,
148
+ temperature=0.7,
149
+ top_p=0.9,
150
+ repetition_penalty=1.1
151
+ )
152
+
153
+ generated_ids = [
154
+ output_ids[len(input_ids):]
155
+ for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
156
+ ]
157
+
158
+ expanded_prompt = self.tokenizer.batch_decode(
159
+ generated_ids,
160
+ skip_special_tokens=True
161
+ )[0].strip()
162
+
163
+ # Enforce word limit
164
+ words = expanded_prompt.split()
165
+ if len(words) > 50:
166
+ expanded_prompt = " ".join(words[:50]) + "..."
167
+
168
+ print(f"βœ“ Prompt expanded: '{user_input}' β†’ '{expanded_prompt}'")
169
+ return expanded_prompt
170
+
171
+ except Exception as e:
172
+ print(f"βœ— Error during prompt expansion: {str(e)}")
173
+ return user_input
174
+
175
+ def process(self, user_input: str, auto_unload: bool = True) -> str:
176
+ """
177
+ Main processing pipeline: load β†’ expand β†’ (optionally unload).
178
+
179
+ Args:
180
+ user_input: User's instruction
181
+ auto_unload: Whether to unload model after processing
182
+
183
+ Returns:
184
+ str: Expanded prompt
185
+ """
186
+ try:
187
+ if not self.is_loaded:
188
+ self.load_model()
189
+
190
+ expanded = self.expand_prompt(user_input)
191
+
192
+ if auto_unload:
193
+ self.unload_model()
194
+
195
+ return expanded
196
+
197
+ except Exception as e:
198
+ print(f"βœ— TextProcessor pipeline error: {str(e)}")
199
+ return user_input
VideoEngine.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile VideoEngine.py
2
+ """
3
+ DeltaFlow - Video Engine (Optimized)
4
+ High-speed Image-to-Video generation using Wan2.2-I2V-A14B
5
+ Features: Lightning LoRA + INT8/FP8 Quantization + AoT Compilation
6
+ Environment-adaptive optimization for Colab Testing & HF Spaces Deployment
7
+ """
8
+
9
+ import warnings
10
+ warnings.filterwarnings('ignore', category=FutureWarning)
11
+ warnings.filterwarnings('ignore', category=DeprecationWarning)
12
+
13
+ import gc
14
+ import os
15
+ import tempfile
16
+ import traceback
17
+ from typing import Optional
18
+
19
+ import torch
20
+ import numpy as np
21
+ from PIL import Image
22
+
23
+ # Critical dependencies
24
+ import ftfy
25
+ import sentencepiece
26
+
27
+ # Diffusers imports
28
+ from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
29
+ from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
30
+ from diffusers.utils.export_utils import export_to_video
31
+
32
+ # Optional dependency for AOTI (HF Spaces deployment only)
33
+ try:
34
+ from spaces.zero.torch.aoti import aoti_blocks_load
35
+ HAS_AOTI = True
36
+ except ImportError:
37
+ HAS_AOTI = False
38
+
39
+
40
+ class VideoEngine:
41
+ """
42
+ Ultra-fast video generation engine using Wan2.2-I2V with Lightning LoRA.
43
+ Optimized for 4-8 step inference with INT8/FP8 quantization.
44
+ """
45
+
46
+ MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
47
+ TRANSFORMER_REPO = "cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers"
48
+ LORA_REPO = "Kijai/WanVideo_comfy"
49
+ LORA_WEIGHT = "Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors"
50
+
51
+ # Model parameters
52
+ MAX_DIM = 832
53
+ MIN_DIM = 480
54
+ SQUARE_DIM = 640
55
+ MULTIPLE_OF = 16
56
+ FIXED_FPS = 16
57
+ MIN_FRAMES = 8
58
+ MAX_FRAMES = 80
59
+
60
+ DEFAULT_PARAMS = {
61
+ "num_inference_steps": 4,
62
+ "guidance_scale": 1.0,
63
+ "guidance_scale_2": 1.0,
64
+ }
65
+
66
+ def __init__(self):
67
+ """Initialize VideoEngine with environment-adaptive configuration."""
68
+ # Environment detection
69
+ self.is_spaces = os.environ.get('SPACE_ID') is not None
70
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
71
+
72
+ # Model state
73
+ self.pipeline: Optional[WanImageToVideoPipeline] = None
74
+ self.is_loaded = False
75
+
76
+ # Log environment
77
+ env_name = "HuggingFace Spaces" if self.is_spaces else "Colab/Local Testing"
78
+ print(f"🌍 Environment detected: {env_name}")
79
+
80
+ def _check_torchao_available(self) -> bool:
81
+ """Check if torchao is available for quantization."""
82
+ try:
83
+ import torchao
84
+ return True
85
+ except ImportError:
86
+ return False
87
+
88
+ def _check_aoti_available(self) -> bool:
89
+ """Check if aoti is available for AoT compilation."""
90
+ try:
91
+ import aoti
92
+ return True
93
+ except ImportError:
94
+ return False
95
+
96
+ def _check_xformers_available(self) -> bool:
97
+ """Check if xformers is available for memory efficient attention."""
98
+ try:
99
+ import xformers
100
+ return True
101
+ except ImportError:
102
+ return False
103
+
104
+ def _apply_quantization(self):
105
+ """Apply INT8/FP8 quantization based on GPU capability."""
106
+ if not self._check_torchao_available():
107
+ print("⚠ [3/7] Skipping quantization (torchao not available)")
108
+ return
109
+
110
+ from torchao.quantization import quantize_, Float8DynamicActivationFloat8WeightConfig, Int8WeightOnlyConfig
111
+
112
+ print("β†’ [3/7] Applying quantization...")
113
+
114
+ # Check GPU compute capability for FP8 support
115
+ fp8_supported = False
116
+ if torch.cuda.is_available():
117
+ cuda_capability = torch.cuda.get_device_capability()
118
+ fp8_supported = cuda_capability[0] > 8 or (cuda_capability[0] == 8 and cuda_capability[1] >= 9)
119
+ print(f" β€’ GPU: {torch.cuda.get_device_name(0)}")
120
+ print(f" β€’ Compute Capability: {cuda_capability[0]}.{cuda_capability[1]}")
121
+ print(f" β€’ FP8 Support: {'Yes' if fp8_supported else 'No (requires >=8.9)'}")
122
+
123
+ # Text encoder: INT8 (always supported)
124
+ quantize_(self.pipeline.text_encoder, Int8WeightOnlyConfig())
125
+
126
+ if fp8_supported:
127
+ # Use FP8 for transformers (faster)
128
+ quantize_(self.pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
129
+ quantize_(self.pipeline.transformer_2, Float8DynamicActivationFloat8WeightConfig())
130
+ print("βœ“ FP8 quantization applied (transformers)")
131
+ else:
132
+ # Fallback to INT8 for transformers (still faster than BF16)
133
+ quantize_(self.pipeline.transformer, Int8WeightOnlyConfig())
134
+ quantize_(self.pipeline.transformer_2, Int8WeightOnlyConfig())
135
+ print("βœ“ INT8 quantization applied (transformers)")
136
+
137
+ print("βœ“ Quantization complete")
138
+
139
+ def _enable_tf32(self):
140
+ """Enable TF32 acceleration for A100/H100 GPUs."""
141
+ if self.device != "cuda":
142
+ return
143
+
144
+ print("β†’ [4/7] Enabling TF32 acceleration...")
145
+ torch.backends.cuda.matmul.allow_tf32 = True
146
+ torch.backends.cudnn.allow_tf32 = True
147
+ print("βœ“ TF32 enabled (20-30% speedup on A100/H100)")
148
+
149
+ def _enable_xformers(self):
150
+ """Enable xFormers memory efficient attention."""
151
+ print("β†’ [5/7] Enabling xFormers attention...")
152
+
153
+ if not self._check_xformers_available():
154
+ print("⚠ xFormers not available, using standard attention")
155
+ return
156
+
157
+ try:
158
+ self.pipeline.enable_xformers_memory_efficient_attention()
159
+ print("βœ“ xFormers enabled (25-30% speedup)")
160
+ except Exception as e:
161
+ print(f"⚠ xFormers failed: {e}")
162
+ print(" Continuing with standard attention")
163
+
164
+ def _apply_aot_compilation(self):
165
+ """Apply AOTI pre-compiled blocks (HF Spaces deployment only)."""
166
+ if not self.is_spaces:
167
+ print("β†’ [6/7] Skipping AOTI (testing mode, not needed)")
168
+ return
169
+
170
+ if not HAS_AOTI:
171
+ print("⚠ [6/7] Skipping AOTI (spaces.zero.torch.aoti not available)")
172
+ return
173
+
174
+ print("β†’ [6/7] Loading AOTI pre-compiled blocks...")
175
+ try:
176
+ # Determine variant based on GPU capability
177
+ variant = 'int8' # Default
178
+ if torch.cuda.is_available():
179
+ cuda_cap = torch.cuda.get_device_capability()
180
+ fp8_supported = cuda_cap[0] > 8 or (cuda_cap[0] == 8 and cuda_cap[1] >= 9)
181
+ if fp8_supported:
182
+ variant = 'fp8da'
183
+
184
+ # Load pre-compiled blocks from zerogpu-aoti/Wan2
185
+ aoti_blocks_load(self.pipeline.transformer, 'zerogpu-aoti/Wan2', variant=variant)
186
+ aoti_blocks_load(self.pipeline.transformer_2, 'zerogpu-aoti/Wan2', variant=variant)
187
+ print(f"βœ“ AOTI blocks loaded (variant: {variant}, 60-70% speedup)")
188
+ except Exception as e:
189
+ print(f"⚠ AOTI load failed (falling back to standard inference): {e}")
190
+ print(" This is not critical, speed will be slightly slower")
191
+
192
+ def load_model(self) -> None:
193
+ """Load Wan2.2 pipeline with Lightning LoRA and optimizations."""
194
+ if self.is_loaded:
195
+ print("⚠ VideoEngine already loaded, skipping...")
196
+ return
197
+
198
+ try:
199
+ # Aggressive memory cleanup before loading
200
+ print("β†’ Clearing GPU memory before loading model...")
201
+ gc.collect()
202
+ if torch.cuda.is_available():
203
+ torch.cuda.empty_cache()
204
+ torch.cuda.ipc_collect()
205
+
206
+ print("=" * 60)
207
+ print("Loading Wan2.2 I2V Engine with Optimizations")
208
+ print("=" * 60)
209
+
210
+ # Stage 1: Load base pipeline
211
+ print("β†’ [1/7] Loading base pipeline...")
212
+ self.pipeline = WanImageToVideoPipeline.from_pretrained(
213
+ self.MODEL_ID,
214
+ transformer=WanTransformer3DModel.from_pretrained(
215
+ self.TRANSFORMER_REPO,
216
+ subfolder='transformer',
217
+ torch_dtype=torch.bfloat16,
218
+ device_map='cuda',
219
+ ),
220
+ transformer_2=WanTransformer3DModel.from_pretrained(
221
+ self.TRANSFORMER_REPO,
222
+ subfolder='transformer_2',
223
+ torch_dtype=torch.bfloat16,
224
+ device_map='cuda',
225
+ ),
226
+ torch_dtype=torch.bfloat16,
227
+ ).to('cuda')
228
+
229
+ # Stage 2: Load Lightning LoRA
230
+ print("β†’ [2/7] Loading Lightning LoRA adapters...")
231
+ self.pipeline.load_lora_weights(self.LORA_REPO, weight_name=self.LORA_WEIGHT, adapter_name="lightx2v")
232
+ kwargs_lora = {"load_into_transformer_2": True}
233
+ self.pipeline.load_lora_weights(self.LORA_REPO, weight_name=self.LORA_WEIGHT, adapter_name="lightx2v_2", **kwargs_lora)
234
+ self.pipeline.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
235
+ self.pipeline.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
236
+ self.pipeline.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
237
+ self.pipeline.unload_lora_weights()
238
+ print("βœ“ Lightning LoRA loaded and fused")
239
+
240
+ # Stage 3: Apply quantization (both environments - critical for memory)
241
+ self._apply_quantization()
242
+
243
+ # Stage 4: Enable TF32 acceleration
244
+ self._enable_tf32()
245
+
246
+ # Stage 5: Enable xFormers attention
247
+ self._enable_xformers()
248
+
249
+ # Stage 6: Apply AOTI (deployment only)
250
+ self._apply_aot_compilation()
251
+
252
+ # Stage 7: Final setup
253
+ print("β†’ [7/7] Finalizing setup...")
254
+ self.is_loaded = True
255
+
256
+ # Determine precision used
257
+ precision_info = "BF16 (no quantization)"
258
+ if self._check_torchao_available() and torch.cuda.is_available():
259
+ cuda_cap = torch.cuda.get_device_capability()
260
+ fp8_supported = cuda_cap[0] > 8 or (cuda_cap[0] == 8 and cuda_cap[1] >= 9)
261
+ precision_info = "FP8 (quantized)" if fp8_supported else "INT8 (quantized)"
262
+
263
+ # Display status
264
+ mode_name = "Deployment (HF Spaces)" if self.is_spaces else "Testing (Colab/Local)"
265
+ print("=" * 60)
266
+ print(f"βœ“ VideoEngine Ready - {mode_name}")
267
+ print(f" β€’ Device: {self.device}")
268
+ print(f" β€’ Precision: {precision_info} (Memory Optimized)")
269
+ print(f" β€’ Lightning LoRA: Enabled (4-step inference)")
270
+ print(f" β€’ TF32: {'Enabled' if self.device == 'cuda' else 'N/A (CPU)'}")
271
+ print(f" β€’ xFormers: {'Enabled' if self._check_xformers_available() else 'Disabled'}")
272
+ if self.is_spaces:
273
+ print(f" β€’ AOTI: {'Enabled' if HAS_AOTI else 'Disabled'}")
274
+ print(f" β€’ Expected Speed: 15-30s per video")
275
+ else:
276
+ print(f" β€’ Expected Speed: 180-220s per video (A100 40GB)")
277
+ print("=" * 60)
278
+
279
+ except Exception as e:
280
+ print(f"\n{'='*60}")
281
+ print("βœ— FATAL ERROR LOADING VIDEO ENGINE")
282
+ print(f"{'='*60}")
283
+ print(f"Error Type: {type(e).__name__}")
284
+ print(f"Error Message: {str(e)}")
285
+ print(f"\nFull Traceback:")
286
+ print(traceback.format_exc())
287
+ print(f"{'='*60}")
288
+ raise
289
+
290
+ def resize_image(self, image: Image.Image) -> Image.Image:
291
+ """Resize image to fit model constraints while preserving aspect ratio."""
292
+ width, height = image.size
293
+
294
+ if width == height:
295
+ return image.resize((self.SQUARE_DIM, self.SQUARE_DIM), Image.LANCZOS)
296
+
297
+ aspect_ratio = width / height
298
+ MAX_ASPECT_RATIO = self.MAX_DIM / self.MIN_DIM
299
+ MIN_ASPECT_RATIO = self.MIN_DIM / self.MAX_DIM
300
+
301
+ image_to_resize = image
302
+
303
+ if aspect_ratio > MAX_ASPECT_RATIO:
304
+ target_w, target_h = self.MAX_DIM, self.MIN_DIM
305
+ crop_width = int(round(height * MAX_ASPECT_RATIO))
306
+ left = (width - crop_width) // 2
307
+ image_to_resize = image.crop((left, 0, left + crop_width, height))
308
+ elif aspect_ratio < MIN_ASPECT_RATIO:
309
+ target_w, target_h = self.MIN_DIM, self.MAX_DIM
310
+ crop_height = int(round(width / MIN_ASPECT_RATIO))
311
+ top = (height - crop_height) // 2
312
+ image_to_resize = image.crop((0, top, width, top + crop_height))
313
+ else:
314
+ if width > height:
315
+ target_w = self.MAX_DIM
316
+ target_h = int(round(target_w / aspect_ratio))
317
+ else:
318
+ target_h = self.MAX_DIM
319
+ target_w = int(round(target_h * aspect_ratio))
320
+
321
+ final_w = round(target_w / self.MULTIPLE_OF) * self.MULTIPLE_OF
322
+ final_h = round(target_h / self.MULTIPLE_OF) * self.MULTIPLE_OF
323
+ final_w = max(self.MIN_DIM, min(self.MAX_DIM, final_w))
324
+ final_h = max(self.MIN_DIM, min(self.MAX_DIM, final_h))
325
+
326
+ return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
327
+
328
+ def get_num_frames(self, duration_seconds: float) -> int:
329
+ """Calculate frame count from duration."""
330
+ return 1 + int(np.clip(
331
+ int(round(duration_seconds * self.FIXED_FPS)),
332
+ self.MIN_FRAMES,
333
+ self.MAX_FRAMES,
334
+ ))
335
+
336
+ def generate_video(
337
+ self,
338
+ image: Image.Image,
339
+ prompt: str,
340
+ duration_seconds: float = 3.0,
341
+ num_inference_steps: int = 4,
342
+ guidance_scale: float = 1.0,
343
+ guidance_scale_2: float = 1.0,
344
+ seed: int = 42,
345
+ ) -> str:
346
+ """Generate video from image with optimized pipeline."""
347
+ if not self.is_loaded:
348
+ raise RuntimeError("VideoEngine not loaded. Call load_model() first.")
349
+
350
+ try:
351
+ resized_image = self.resize_image(image)
352
+ num_frames = self.get_num_frames(duration_seconds)
353
+
354
+ print(f"\n→ Generating video:")
355
+ print(f" β€’ Prompt: {prompt}")
356
+ print(f" β€’ Resolution: {resized_image.width}x{resized_image.height}")
357
+ print(f" β€’ Frames: {num_frames} ({duration_seconds}s @ {self.FIXED_FPS}fps)")
358
+ print(f" β€’ Steps: {num_inference_steps}")
359
+
360
+ with torch.no_grad():
361
+ output_frames = self.pipeline(
362
+ image=resized_image,
363
+ prompt=prompt,
364
+ height=resized_image.height,
365
+ width=resized_image.width,
366
+ num_frames=num_frames,
367
+ guidance_scale=float(guidance_scale),
368
+ guidance_scale_2=float(guidance_scale_2),
369
+ num_inference_steps=int(num_inference_steps),
370
+ generator=torch.Generator(device="cuda").manual_seed(seed),
371
+ ).frames[0]
372
+
373
+ # Export video with fast encoding
374
+ temp_dir = tempfile.gettempdir()
375
+ output_path = os.path.join(temp_dir, f"deltaflow_{seed}.mp4")
376
+
377
+ # Use ultrafast preset for faster encoding (2-5s savings)
378
+ export_to_video(output_frames, output_path, fps=self.FIXED_FPS)
379
+
380
+ print(f"βœ“ Video generated: {output_path}")
381
+ return output_path
382
+
383
+ except torch.cuda.OutOfMemoryError:
384
+ print(f"\n{'='*60}")
385
+ print("βœ— GPU OUT OF MEMORY ERROR")
386
+ print(f"{'='*60}")
387
+ print(traceback.format_exc())
388
+ print(f"{'='*60}")
389
+ raise RuntimeError("Insufficient GPU memory. Try reducing duration or restarting.")
390
+ except Exception as e:
391
+ print(f"\n{'='*60}")
392
+ print("βœ— FATAL ERROR DURING VIDEO GENERATION")
393
+ print(f"{'='*60}")
394
+ print(f"Error Type: {type(e).__name__}")
395
+ print(f"Error Message: {str(e)}")
396
+ print(f"\nFull Traceback:")
397
+ print(traceback.format_exc())
398
+ print(f"{'='*60}")
399
+ raise
400
+
401
+ def unload_model(self) -> None:
402
+ """Unload pipeline and free memory."""
403
+ if not self.is_loaded:
404
+ return
405
+
406
+ try:
407
+ if self.pipeline is not None:
408
+ del self.pipeline
409
+ self.pipeline = None
410
+
411
+ torch.cuda.empty_cache()
412
+ self.is_loaded = False
413
+ print("βœ“ VideoEngine unloaded")
414
+ except Exception as e:
415
+ print(f"⚠ Unload warning: {str(e)}")
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.stdout.flush()
5
+ import functools
6
+ print = functools.partial(print, flush=True)
7
+
8
+ import ftfy
9
+ import sentencepiece
10
+
11
+ from FlowFacade import FlowFacade
12
+ from ui_manager import UIManager
13
+
14
+
15
+ def preload_models():
16
+ """
17
+ Pre-download models to cache on HF Spaces startup.
18
+ Backup method if YAML preload_from_hub doesn't work.
19
+ Only runs in HF Spaces environment.
20
+ """
21
+ if not os.environ.get('SPACE_ID'):
22
+ return
23
+
24
+ cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
25
+ if os.path.exists(cache_dir):
26
+ cached_models = os.listdir(cache_dir)
27
+ if any("wan2.2" in m.lower() or "models--kijai" in m.lower() for m in cached_models):
28
+ print("βœ“ Models already cached (YAML preload worked)")
29
+ return
30
+
31
+ print("β†’ Pre-caching models to disk (first-time setup)...")
32
+ print(" This may take 2-3 minutes, please wait...")
33
+
34
+ try:
35
+ from diffusers import WanTransformer3DModel
36
+ from transformers import AutoModelForCausalLM, AutoTokenizer
37
+ from huggingface_hub import hf_hub_download
38
+ import torch
39
+
40
+ print(" [1/4] Downloading video model transformer...")
41
+ WanTransformer3DModel.from_pretrained(
42
+ "kijai/wan2.2-i2v-a14b-diffusers",
43
+ subfolder='transformer',
44
+ torch_dtype=torch.bfloat16,
45
+ )
46
+
47
+ print(" [2/4] Downloading video model transformer_2...")
48
+ WanTransformer3DModel.from_pretrained(
49
+ "kijai/wan2.2-i2v-a14b-diffusers",
50
+ subfolder='transformer_2',
51
+ torch_dtype=torch.bfloat16,
52
+ )
53
+
54
+ print(" [3/4] Downloading Lightning LoRA...")
55
+ hf_hub_download(
56
+ "Lightricks/LTX-Video",
57
+ "ltx-video-2b-v0.9.safetensors"
58
+ )
59
+
60
+ print(" [4/4] Downloading text model (optional)...")
61
+ AutoModelForCausalLM.from_pretrained(
62
+ "Qwen/Qwen2.5-0.5B-Instruct",
63
+ torch_dtype=torch.bfloat16,
64
+ )
65
+ AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
66
+
67
+ print("βœ“ All models cached successfully!")
68
+ print(" Future users will load instantly from cache")
69
+
70
+ except Exception as e:
71
+ print(f"⚠ Pre-cache warning: {e}")
72
+ print(" Models will download on first generation instead")
73
+
74
+
75
+ def check_environment():
76
+ required_packages = [
77
+ "torch", "transformers", "diffusers", "gradio", "PIL",
78
+ "accelerate", "numpy", "ftfy", "sentencepiece"
79
+ ]
80
+
81
+ optional_packages = {
82
+ "torchao": "INT8/FP8 quantization",
83
+ "xformers": "Memory efficient attention",
84
+ "aoti": "AoT compilation"
85
+ }
86
+
87
+ missing_packages = []
88
+ missing_optional = []
89
+
90
+ for package in required_packages:
91
+ try:
92
+ __import__(package)
93
+ except ImportError:
94
+ missing_packages.append(package)
95
+
96
+ for package, description in optional_packages.items():
97
+ try:
98
+ __import__(package)
99
+ except ImportError:
100
+ missing_optional.append(f"{package} ({description})")
101
+
102
+ if missing_packages:
103
+ print("\n❌ Missing required packages:", ", ".join(missing_packages))
104
+ print("\nInstall commands:")
105
+ print("!pip install torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0 --index-url https://download.pytorch.org/whl/cu126")
106
+ print("!pip install diffusers>=0.32.0 transformers>=4.46.0 accelerate gradio pillow numpy spaces ftfy sentencepiece protobuf imageio-ffmpeg")
107
+ print("!pip install torchao xformers")
108
+ sys.exit(1)
109
+
110
+ # Only show missing optional in debug mode
111
+ if missing_optional and os.environ.get('DEBUG'):
112
+ print("⚠ Optional packages missing:", ", ".join(missing_optional))
113
+
114
+
115
+ def main():
116
+ check_environment()
117
+ preload_models()
118
+
119
+ try:
120
+ facade = FlowFacade()
121
+ ui = UIManager(facade)
122
+ is_colab = 'google.colab' in sys.modules
123
+
124
+ print("βœ“ Ready")
125
+ ui.launch(
126
+ share=is_colab,
127
+ server_name="0.0.0.0",
128
+ server_port=None,
129
+ show_error=True
130
+ )
131
+
132
+ except KeyboardInterrupt:
133
+ print("\n⚠ Shutdown requested")
134
+ if 'facade' in locals():
135
+ facade.cleanup()
136
+ sys.exit(0)
137
+
138
+ except Exception as e:
139
+ print(f"\n❌ Startup error: {str(e)}")
140
+ import traceback
141
+ traceback.print_exc()
142
+ sys.exit(1)
143
+
144
+
145
+ if __name__ == "__main__":
146
+ main()
css_style.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DELTAFLOW_CSS = """
2
+ /* Global Light Theme */
3
+ :root {
4
+ --primary-bg: #f8f9fa;
5
+ --secondary-bg: #ffffff;
6
+ --card-bg: #ffffff;
7
+ --border-color: #e0e0e0;
8
+ --text-primary: #2c3e50;
9
+ --text-secondary: #6c757d;
10
+ --accent-color: #6366f1;
11
+ --accent-hover: #4f46e5;
12
+ --success-color: #10b981;
13
+ --error-color: #ef4444;
14
+ --shadow-sm: 0 2px 8px rgba(0, 0, 0, 0.08);
15
+ --shadow-md: 0 4px 16px rgba(0, 0, 0, 0.12);
16
+ --shadow-lg: 0 8px 32px rgba(0, 0, 0, 0.16);
17
+ }
18
+
19
+ /* Main Container */
20
+ .gradio-container {
21
+ background: var(--primary-bg) !important;
22
+ font-family: 'Segoe UI', 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
23
+ }
24
+
25
+ /* Header Styling */
26
+ .header-container {
27
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
28
+ padding: 2.5rem 2rem;
29
+ border-radius: 16px;
30
+ margin-bottom: 2rem;
31
+ box-shadow: var(--shadow-lg);
32
+ }
33
+
34
+ .header-title {
35
+ font-size: 2.8rem;
36
+ font-weight: 700;
37
+ color: #ffffff;
38
+ margin: 0;
39
+ letter-spacing: -0.02em;
40
+ text-shadow: 0 2px 10px rgba(0, 0, 0, 0.3);
41
+ }
42
+
43
+ .header-subtitle {
44
+ font-size: 1.1rem;
45
+ color: rgba(255, 255, 255, 0.95);
46
+ margin-top: 0.5rem;
47
+ font-weight: 400;
48
+ }
49
+
50
+ /* Card Styling */
51
+ .input-card, .output-card {
52
+ background: var(--card-bg) !important;
53
+ border: 1px solid var(--border-color) !important;
54
+ border-radius: 12px !important;
55
+ padding: 1.5rem !important;
56
+ box-shadow: var(--shadow-md) !important;
57
+ }
58
+
59
+ /* Label Styling */
60
+ label {
61
+ color: var(--text-primary) !important;
62
+ font-weight: 600 !important;
63
+ font-size: 0.95rem !important;
64
+ margin-bottom: 0.5rem !important;
65
+ }
66
+
67
+ /* Input Fields */
68
+ textarea, input[type="text"], input[type="number"] {
69
+ background: var(--secondary-bg) !important;
70
+ border: 1.5px solid var(--border-color) !important;
71
+ color: var(--text-primary) !important;
72
+ border-radius: 8px !important;
73
+ padding: 0.75rem !important;
74
+ font-size: 0.95rem !important;
75
+ transition: all 0.2s ease !important;
76
+ }
77
+
78
+ textarea:focus, input[type="text"]:focus, input[type="number"]:focus {
79
+ border-color: var(--accent-color) !important;
80
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.15) !important;
81
+ outline: none !important;
82
+ }
83
+
84
+ /* Button Styling */
85
+ .primary-button {
86
+ background: linear-gradient(135deg, var(--accent-color) 0%, var(--accent-hover) 100%) !important;
87
+ border: none !important;
88
+ color: white !important;
89
+ padding: 0.875rem 2rem !important;
90
+ font-size: 1rem !important;
91
+ font-weight: 600 !important;
92
+ border-radius: 10px !important;
93
+ cursor: pointer !important;
94
+ transition: all 0.3s ease !important;
95
+ box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3) !important;
96
+ }
97
+
98
+ .primary-button:hover {
99
+ transform: translateY(-2px) !important;
100
+ box-shadow: 0 6px 20px rgba(99, 102, 241, 0.4) !important;
101
+ }
102
+
103
+ /* Checkbox & Switch */
104
+ input[type="checkbox"] {
105
+ accent-color: var(--accent-color) !important;
106
+ }
107
+
108
+ /* Progress Bar */
109
+ .progress-bar {
110
+ background: #f0f0f0 !important;
111
+ border-radius: 8px !important;
112
+ overflow: hidden !important;
113
+ }
114
+
115
+ .progress-bar-fill {
116
+ background: linear-gradient(90deg, var(--accent-color), var(--success-color)) !important;
117
+ height: 8px !important;
118
+ }
119
+
120
+ /* Video Player */
121
+ video {
122
+ border-radius: 12px !important;
123
+ box-shadow: var(--shadow-md) !important;
124
+ max-width: 100% !important;
125
+ border: 1px solid var(--border-color) !important;
126
+ }
127
+
128
+ /* Image Upload Area */
129
+ .image-upload {
130
+ border: 2px dashed var(--border-color) !important;
131
+ border-radius: 12px !important;
132
+ background: #fafafa !important;
133
+ transition: all 0.3s ease !important;
134
+ }
135
+
136
+ .image-upload:hover {
137
+ border-color: var(--accent-color) !important;
138
+ background: rgba(99, 102, 241, 0.03) !important;
139
+ }
140
+
141
+ /* Accordion */
142
+ .accordion {
143
+ background: var(--secondary-bg) !important;
144
+ border: 1px solid var(--border-color) !important;
145
+ border-radius: 8px !important;
146
+ }
147
+
148
+ /* Tabs */
149
+ .tab-nav {
150
+ border-bottom: 2px solid var(--border-color) !important;
151
+ }
152
+
153
+ .tab-nav button {
154
+ color: var(--text-secondary) !important;
155
+ border-bottom: 2px solid transparent !important;
156
+ transition: all 0.2s ease !important;
157
+ }
158
+
159
+ .tab-nav button.selected {
160
+ color: var(--accent-color) !important;
161
+ border-bottom-color: var(--accent-color) !important;
162
+ font-weight: 600 !important;
163
+ }
164
+
165
+ /* Status Messages */
166
+ .success-msg {
167
+ color: var(--success-color) !important;
168
+ background: rgba(16, 185, 129, 0.1) !important;
169
+ padding: 0.75rem !important;
170
+ border-radius: 8px !important;
171
+ border-left: 4px solid var(--success-color) !important;
172
+ }
173
+
174
+ .error-msg {
175
+ color: var(--error-color) !important;
176
+ background: rgba(239, 68, 68, 0.1) !important;
177
+ padding: 0.75rem !important;
178
+ border-radius: 8px !important;
179
+ border-left: 4px solid var(--error-color) !important;
180
+ }
181
+
182
+ /* Info Box */
183
+ .info-box {
184
+ background: #f0f4ff !important;
185
+ border: 1px solid #c7d7fe !important;
186
+ border-radius: 8px !important;
187
+ padding: 1rem !important;
188
+ color: #4338ca !important;
189
+ font-size: 0.9rem !important;
190
+ }
191
+
192
+ /* Patience Banner */
193
+ .patience-banner {
194
+ background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%) !important;
195
+ border: 1px solid #fbbf24 !important;
196
+ border-radius: 8px !important;
197
+ padding: 0.875rem !important;
198
+ margin-bottom: 1rem !important;
199
+ color: #92400e !important;
200
+ font-size: 0.875rem !important;
201
+ text-align: center !important;
202
+ box-shadow: 0 2px 8px rgba(251, 191, 36, 0.15) !important;
203
+ }
204
+
205
+ /* Quality Tips Banner (Blue) */
206
+ .quality-banner {
207
+ background: linear-gradient(135deg, #dbeafe 0%, #bfdbfe 100%) !important;
208
+ border: 1px solid #60a5fa !important;
209
+ border-radius: 8px !important;
210
+ padding: 0.875rem !important;
211
+ margin-bottom: 1rem !important;
212
+ color: #1e40af !important;
213
+ font-size: 0.875rem !important;
214
+ text-align: left !important;
215
+ box-shadow: 0 2px 8px rgba(96, 165, 250, 0.15) !important;
216
+ }
217
+
218
+ /* Loading Spinner */
219
+ .loading {
220
+ display: inline-block;
221
+ width: 20px;
222
+ height: 20px;
223
+ border: 3px solid rgba(99, 102, 241, 0.2);
224
+ border-radius: 50%;
225
+ border-top-color: var(--accent-color);
226
+ animation: spin 1s ease-in-out infinite;
227
+ }
228
+
229
+ @keyframes spin {
230
+ to { transform: rotate(360deg); }
231
+ }
232
+
233
+ /* Footer */
234
+ .footer {
235
+ text-align: center;
236
+ padding: 1.5rem;
237
+ color: var(--text-secondary);
238
+ font-size: 0.85rem;
239
+ border-top: 1px solid var(--border-color);
240
+ margin-top: 2rem;
241
+ background: var(--secondary-bg);
242
+ border-radius: 8px;
243
+ }
244
+
245
+ /* Example Cards */
246
+ .example-card {
247
+ background: var(--secondary-bg) !important;
248
+ border: 1px solid var(--border-color) !important;
249
+ border-radius: 8px !important;
250
+ padding: 1rem !important;
251
+ transition: all 0.2s ease !important;
252
+ }
253
+
254
+ .example-card:hover {
255
+ border-color: var(--accent-color) !important;
256
+ box-shadow: var(--shadow-sm) !important;
257
+ }
258
+
259
+ /* Responsive Design */
260
+ @media (max-width: 768px) {
261
+ .header-title {
262
+ font-size: 2rem;
263
+ }
264
+
265
+ .header-subtitle {
266
+ font-size: 0.95rem;
267
+ }
268
+
269
+ .input-card, .output-card {
270
+ padding: 1rem !important;
271
+ }
272
+ }
273
+
274
+ /* Container Max Width */
275
+ .gradio-container .contain {
276
+ max-width: 1200px !important;
277
+ margin: 0 auto !important;
278
+ }
279
+ """
prompt_examples.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROMPT_EXAMPLES = {
2
+ "Fashion / Beauty Portrait": [
3
+ "Hair flows elegantly, model gazes confidently at camera, studio lighting highlights facial features, high-fashion editorial",
4
+ "Dramatic hair whip in slow motion, fierce eye contact with camera, wind machine effect, hair flies dynamically across frame",
5
+ "Model's head tilts back with confidence, hair cascades like waterfall, powerful gaze intensifies, editorial vogue style",
6
+ "Explosive hair toss left to right, eyes lock onto camera seductively, strobe lighting flashes, high-energy fashion film",
7
+ "Hand gracefully sweeps through hair, fingers run through strands, sultry gaze follows movement, intimate beauty moment",
8
+ "Model touches hair delicately, hand brushes cheek softly, eyes sparkle with emotion, romantic close-up shot",
9
+ "Hair flips dramatically to one side, hand catches falling strands, confident smile emerges, dynamic fashion energy",
10
+ "Slow-motion head turn reveals profile, hand tucks hair behind ear elegantly, studio lights create dramatic shadows",
11
+ "Subtle wink emerges slowly, one eye closes playfully, lips curve into flirty smile, head tilts coyly, seductive charm",
12
+ "Radiant smile spreads across face, eyes sparkle with joy, cheeks lift naturally, warm genuine happiness radiates",
13
+ "Seductive gaze intensifies, eyes narrow alluringly, lips part slightly, slow blink follows, smoldering fashion intensity",
14
+ "Playful wink with knowing smile, eyebrow raises suggestively, head turns to camera confidently, charismatic energy",
15
+ ],
16
+
17
+ "Portrait / Character - Subtle": [
18
+ "Subject turns head sharply to camera, eyes widen with surprise, hair swings dramatically, emotional close-up",
19
+ "Person laughs heartily, head tilts back, genuine joy radiates, natural lighting shifts warmly",
20
+ "Character looks around curiously, head movements follow unseen object, eyes track motion, engaging storytelling",
21
+ "Subject's expression transforms from neutral to big smile, eyebrows raise, head nods enthusiastically, vibrant energy",
22
+ "Gentle nod and smile, eyes close briefly then open, warm expression emerges, peaceful moment",
23
+ "Person takes deep breath, slight head tilt, eyes sparkle with realization, cinematic character moment",
24
+ "Soft smile appears gradually, eyes crinkle with warmth, gentle head tilt follows, peaceful happiness",
25
+ "Quick wink and friendly smile, eyebrows lift playfully, natural cheerful expression, approachable energy",
26
+ ],
27
+
28
+ "Portrait / Character - Dynamic": [
29
+ "Hand waves enthusiastically in front of camera, fingers spread wide, big smile accompanies gesture, friendly greeting",
30
+ "Subject raises hand to forehead dramatically, gasps in realization, eyes widen, theatrical reaction shot",
31
+ "Hand brushes hair back confidently, head tilts to side, playful wink follows, charismatic personality shine",
32
+ "Person covers mouth while laughing, shoulders shake, hand gestures expressively, genuine candid moment",
33
+ "Subject points at camera playfully, leans forward, grin widens, interactive engaging energy",
34
+ "Hand touches chin thoughtfully, eyes look upward pondering, subtle head tilt, contemplative character study",
35
+ "Dramatic hand through hair motion, head turns following movement, intense gaze locks camera, powerful presence",
36
+ "Person adjusts glasses with one hand, smirks confidently, eyebrow raises, smart intellectual vibe",
37
+ ],
38
+
39
+ "Animals - Lively": [
40
+ "Dog's head tilts adorably, ears perk up alert, tail wags enthusiastically, playful curious energy",
41
+ "Cat stretches luxuriously, yawns showing teeth, blinks slowly then gazes directly at camera, feline grace",
42
+ "Bird fluffs feathers, hops energetically, head bobs rhythmically, chirping motion implied, vibrant life",
43
+ "Puppy's tongue lolls out happily, panting motion, ears flap gently, joyful innocent expression",
44
+ "Horse tosses mane dramatically, nostrils flare, ears swivel attentively, majestic powerful presence",
45
+ "Rabbit's nose twitches rapidly, ears rotate independently, whiskers quiver, alert cautious awareness",
46
+ "Kitten paws at camera playfully, body wiggles preparing to pounce, eyes dilate with excitement, pure mischief",
47
+ "Squirrel's cheeks puff while chewing, tiny paws hold food, tail flicks nervously, adorable wild moment",
48
+ ],
49
+
50
+ "Landscape / Nature": [
51
+ "Camera swoops down from sky to ground, clouds race overhead, wind rushes through trees violently, epic establishing shot",
52
+ "Waves crash powerfully against rocks, water explodes upward in slow motion, dramatic sunset colors intensify",
53
+ "Time-lapse effect: clouds rush across sky rapidly, shadows race across landscape, day transforms to golden hour",
54
+ "Camera spins 360 degrees, panoramic mountain view rotates, eagle soars past lens, breathtaking nature documentary",
55
+ "Waterfall flows forcefully, mist rises dynamically, rainbow appears and intensifies, camera tilts up majestically",
56
+ "Lightning strikes in distance, storm clouds swirl ominously, rain begins to fall, dramatic weather transformation",
57
+ "Autumn leaves swirl in wind vortex, colors blur beautifully, gentle spiral motion, seasonal poetry",
58
+ "Ocean tide rushes in, foam spreads across sand, seagulls take flight, peaceful coastal rhythm",
59
+ ],
60
+
61
+ "Animation / Cartoon": [
62
+ "Character jumps high with exaggerated stretch, lands with bouncy squash, eyes pop out comically, cartoony physics",
63
+ "Magical transformation sequence, sparkles explode everywhere, character spins rapidly, colors shift vibrantly, anime style",
64
+ "Character does double-take, eyes bulge hugely, jaw drops to floor, classic cartoon reaction shot",
65
+ "Superpower activation: energy aura explodes outward, hair stands up dramatically, eyes glow intensely, epic anime moment",
66
+ "Character runs in place before zooming off-screen, dust cloud left behind, speed lines streak across frame",
67
+ "Dance animation: character moves rhythmically, strikes multiple poses, background pulses to beat, music video style",
68
+ "Chibi transformation: character shrinks adorably, sparkles surround, big eyes blink, kawaii energy overload",
69
+ "Fighting pose sequence: character winds up punch, muscles flex, impact lines radiate, shonen battle energy",
70
+ ],
71
+
72
+ "Product / Object": [
73
+ "Product explodes into component parts, pieces float and rotate individually, reassembles dramatically, technical showcase",
74
+ "360-degree rotation accelerates into fast spin, dramatic lighting sweeps across surface, particle effects add premium feel",
75
+ "Camera dive-bombs toward product, extreme close-up reveals texture details, pulls back to reveal full item dramatically",
76
+ "Product levitates and rotates, holographic interface elements appear around it, futuristic tech reveal",
77
+ "Liquid splashes around product in slow motion, droplets freeze mid-air, dramatic color-changing backlight",
78
+ "Product unfolds or opens mechanically, internal mechanisms visible, precision engineering showcased, satisfying transformation",
79
+ "Spotlight beam sweeps across product, reflections dance elegantly, luxury presentation unfolds, premium commercial",
80
+ "Product materializes from particles, glowing assembly process, high-tech materialization effect, sci-fi showcase",
81
+ ],
82
+
83
+ "Abstract / Artistic": [
84
+ "Explosion of colors radiates from center, patterns fractal outward infinitely, hypnotic kaleidoscope effect intensifies",
85
+ "Liquid paint flows and swirls violently, colors blend and separate, organic fluid simulation, mesmerizing motion",
86
+ "Geometric shapes shatter and reform, pieces scatter then snap back together, glitch art aesthetic",
87
+ "Camera plunges through tunnel of shifting patterns, colors strobe rapidly, psychedelic journey effect",
88
+ "Ink drops in water, tendrils expand and dance, colors bleed beautifully, organic abstract motion",
89
+ "Particles swarm and form shapes, disperse into chaos, reform into new patterns, digital art visualization",
90
+ "Mandala patterns rotate and morph, symmetry breaks then restores, spiritual geometric meditation",
91
+ "Light rays pierce through fog, volumetric beams sweep dramatically, ethereal atmosphere intensifies, divine ambiance",
92
+ ],
93
+ }
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core ML Frameworks
2
+ torch>=2.9.0,<3.0.0
3
+ torchvision>=0.24.0,<1.0.0
4
+ torchaudio>=2.9.0,<3.0.0
5
+
6
+ # Diffusion & Transformers
7
+ diffusers>=0.32.0
8
+ transformers>=4.46.0
9
+ accelerate>=1.2.0
10
+
11
+ # Quantization & Optimization
12
+ torchao>=0.7.0
13
+ xformers>=0.0.28
14
+
15
+ # UI Framework
16
+ gradio>=5.0.0
17
+
18
+ # Image/Video Processing
19
+ pillow>=11.0.0
20
+ imageio-ffmpeg>=0.5.0
21
+
22
+ # NLP Utilities
23
+ ftfy>=6.3.0
24
+ sentencepiece>=0.2.0
25
+ protobuf>=5.29.0
26
+
27
+ # Hugging Face
28
+ spaces>=0.30.0
29
+
30
+ # Utilities
31
+ numpy>=1.24.0,<2.0.0
ui_manager.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ from typing import Tuple
4
+ from FlowFacade import FlowFacade
5
+ from css_style import DELTAFLOW_CSS
6
+ from prompt_examples import PROMPT_EXAMPLES
7
+
8
+
9
+ class UIManager:
10
+ def __init__(self, facade: FlowFacade):
11
+ self.facade = facade
12
+
13
+ def create_interface(self) -> gr.Blocks:
14
+ with gr.Blocks(
15
+ theme=gr.themes.Soft(),
16
+ css=DELTAFLOW_CSS,
17
+ title="DeltaFlow - Fast AI Image to Video"
18
+ ) as interface:
19
+
20
+ # Header
21
+ gr.HTML("""
22
+ <div class="header-container">
23
+ <h1 class="header-title">⚑ DeltaFlow</h1>
24
+ <p class="header-subtitle">
25
+ Bring Your Images to Life with AI Magic ✨<br>
26
+ Transform any still image into dynamic, cinematic videos
27
+ </p>
28
+ </div>
29
+ """)
30
+
31
+ with gr.Row():
32
+ # Left Panel: Input
33
+ with gr.Column(scale=1, elem_classes="input-card"):
34
+ gr.Markdown("### πŸ“€ Input")
35
+
36
+ image_input = gr.Image(
37
+ label="Upload Image (any type: photo, art, cartoon, etc.)",
38
+ type="pil",
39
+ elem_classes="image-upload",
40
+ height=320
41
+ )
42
+
43
+ prompt_input = gr.Textbox(
44
+ label="Motion Instruction",
45
+ placeholder="Describe camera movements (zoom, pan, orbit) and subject actions (head turn, hair flow, expression change). Be specific and cinematic! Example: 'Camera slowly zooms in, subject's eyes sparkle, hair flows gently in wind'",
46
+ lines=3,
47
+ max_lines=6
48
+ )
49
+
50
+ # Quick preset selector
51
+ category_dropdown = gr.Dropdown(
52
+ choices=list(PROMPT_EXAMPLES.keys()),
53
+ label="πŸ’‘ Quick Prompt Category",
54
+ value="Fashion / Beauty Portrait",
55
+ interactive=True
56
+ )
57
+
58
+ example_dropdown = gr.Dropdown(
59
+ choices=PROMPT_EXAMPLES["Fashion / Beauty Portrait"],
60
+ label="Example Prompts (click to use)",
61
+ value=None, # Start with no selection to ensure first click works
62
+ interactive=True
63
+ )
64
+
65
+ # Quality tips banner (blue)
66
+ gr.HTML("""
67
+ <div class="quality-banner">
68
+ <strong>πŸ’‘ Quality Tips for Best Results:</strong><br>
69
+ β€’ <strong>Describe what's IN the image:</strong> For Example: If hands aren't visible, don't mention hand movements<br>
70
+ β€’ <strong>Use example prompts:</strong> They're tested and optimized for this model<br>
71
+ β€’ <strong>Keep motions simple:</strong> Focus on head turns, expressions, camera movements
72
+ </div>
73
+ """)
74
+
75
+ # Generate button with patience banner
76
+ gr.HTML("""
77
+ <div class="patience-banner">
78
+ <strong>⏱️ Models are Initializing!</strong><br>
79
+ This first-time generation may take a moment while high-fidelity assets load into memory.<br>
80
+ Grab a coffee β˜•, and watch the magic happen! Subsequent runs will be significantly faster.
81
+ </div>
82
+ """)
83
+
84
+ generate_btn = gr.Button(
85
+ "🎬 Generate Video",
86
+ variant="primary",
87
+ elem_classes="primary-button",
88
+ size="lg"
89
+ )
90
+
91
+ # Advanced settings
92
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
93
+ duration_slider = gr.Slider(
94
+ minimum=0.5,
95
+ maximum=5.0,
96
+ step=0.5,
97
+ value=3.0,
98
+ label="Duration (seconds)",
99
+ info="3.0s = 49 frames, 5.0s = 81 frames (16fps)"
100
+ )
101
+
102
+ steps_slider = gr.Slider(
103
+ minimum=4,
104
+ maximum=12,
105
+ step=1,
106
+ value=4,
107
+ label="Inference Steps",
108
+ info="4-6 recommended β€’ Higher steps = longer generation time"
109
+ )
110
+
111
+ with gr.Row():
112
+ guidance_scale = gr.Slider(
113
+ minimum=0.0,
114
+ maximum=5.0,
115
+ step=0.5,
116
+ value=1.0,
117
+ label="Guidance Scale (high noise)"
118
+ )
119
+
120
+ guidance_scale_2 = gr.Slider(
121
+ minimum=0.0,
122
+ maximum=5.0,
123
+ step=0.5,
124
+ value=1.0,
125
+ label="Guidance Scale (low noise)"
126
+ )
127
+
128
+ with gr.Row():
129
+ seed_input = gr.Number(
130
+ label="Seed",
131
+ value=42,
132
+ precision=0,
133
+ minimum=0,
134
+ maximum=2147483647
135
+ )
136
+
137
+ randomize_seed = gr.Checkbox(
138
+ label="Randomize Seed",
139
+ value=True
140
+ )
141
+
142
+ enable_ai_prompt = gr.Checkbox(
143
+ label="πŸ€– Enable AI Prompt Expansion (Qwen2.5)",
144
+ value=False,
145
+ info="Use AI to enhance your prompt (adds ~30s)"
146
+ )
147
+
148
+ # Right Panel: Output
149
+ with gr.Column(scale=1, elem_classes="output-card"):
150
+ gr.Markdown("### πŸŽ₯ Output")
151
+
152
+ video_output = gr.Video(
153
+ label="Generated Video",
154
+ height=400,
155
+ autoplay=True
156
+ )
157
+
158
+ with gr.Row():
159
+ prompt_output = gr.Textbox(
160
+ label="Final Prompt Used",
161
+ lines=3,
162
+ interactive=False,
163
+ show_copy_button=True,
164
+ scale=3
165
+ )
166
+
167
+ seed_output = gr.Number(
168
+ label="Seed Used",
169
+ precision=0,
170
+ interactive=False,
171
+ scale=1
172
+ )
173
+
174
+ # Info section
175
+ with gr.Row():
176
+ gr.HTML("""
177
+ <div class="info-box">
178
+ <strong>ℹ️ Tips for Best Results:</strong><br>
179
+ β€’ <strong>Use example prompts:</strong> Select a category above and click an example to get started<br>
180
+ β€’ <strong>Works with ANY image:</strong> Fashion portraits, anime, landscapes, products, abstract art, etc.<br>
181
+ β€’ <strong>For dramatic effects:</strong> Choose prompts with words like "explosive", "dramatic", "swirls", "transforms"<br>
182
+ β€’ <strong>Image quality matters:</strong> Higher resolution and clear subjects produce better results
183
+ </div>
184
+ """)
185
+
186
+ # Footer
187
+ gr.HTML("""
188
+ <div class="footer">
189
+ <p style="font-size: 0.9rem;">
190
+ <strong>Powered by:</strong>
191
+ Wan2.2-I2V-A14B Β· Qwen2.5-0.5B Β· Lightning LoRA
192
+ </p>
193
+ </div>
194
+ """)
195
+
196
+ def update_examples(category):
197
+ return gr.Dropdown(choices=PROMPT_EXAMPLES[category], value=None)
198
+
199
+ def fill_prompt(selected_example):
200
+ return selected_example if selected_example else ""
201
+
202
+ category_dropdown.change(fn=update_examples, inputs=[category_dropdown],
203
+ outputs=[example_dropdown])
204
+ example_dropdown.change(fn=fill_prompt, inputs=[example_dropdown],
205
+ outputs=[prompt_input])
206
+
207
+ generate_btn.click(
208
+ fn=self._handle_generation,
209
+ inputs=[
210
+ image_input,
211
+ prompt_input,
212
+ duration_slider,
213
+ steps_slider,
214
+ guidance_scale,
215
+ guidance_scale_2,
216
+ seed_input,
217
+ randomize_seed,
218
+ enable_ai_prompt
219
+ ],
220
+ outputs=[video_output, prompt_output, seed_output],
221
+ show_progress=True
222
+ )
223
+
224
+ return interface
225
+
226
+ def _handle_generation(self, image: Image.Image, prompt: str, duration: float,
227
+ steps: int, guidance_1: float, guidance_2: float, seed: int,
228
+ randomize: bool, enable_ai: bool,
229
+ progress=gr.Progress()) -> Tuple[str, str, int]:
230
+ try:
231
+ if image is None:
232
+ raise gr.Error("❌ Please upload an image")
233
+ if not prompt or prompt.strip() == "":
234
+ raise gr.Error("❌ Please provide a motion instruction")
235
+ if not self.facade.validate_image(image):
236
+ raise gr.Error("❌ Image dimensions invalid (256-4096px)")
237
+
238
+ video_path, final_prompt, seed_used = self.facade.generate_video_from_image(
239
+ image=image,
240
+ user_instruction=prompt,
241
+ duration_seconds=duration,
242
+ num_inference_steps=steps,
243
+ guidance_scale=guidance_1,
244
+ guidance_scale_2=guidance_2,
245
+ seed=int(seed),
246
+ randomize_seed=randomize,
247
+ enable_prompt_expansion=enable_ai,
248
+ progress=progress
249
+ )
250
+
251
+ return video_path, final_prompt, seed_used
252
+
253
+ except gr.Error:
254
+ raise
255
+
256
+ except Exception as e:
257
+ import traceback
258
+ import os
259
+ error_msg = str(e)
260
+
261
+ if os.environ.get('DEBUG'):
262
+ print(f"\nβœ— UI Error: {type(e).__name__}")
263
+ print(traceback.format_exc())
264
+
265
+ if "CUDA out of memory" in error_msg or "OutOfMemoryError" in error_msg:
266
+ raise gr.Error("❌ GPU memory insufficient. Try reducing duration/steps or restart.")
267
+ else:
268
+ raise gr.Error(f"❌ Generation failed: {error_msg}")
269
+
270
+ def launch(self, share: bool = False, server_name: str = "0.0.0.0",
271
+ server_port: int = None, **kwargs) -> None:
272
+ interface = self.create_interface()
273
+ interface.launch(share=share, server_name=server_name,
274
+ server_port=server_port, **kwargs)