Spaces:

DawnC
/

VividFlow

Running on Zero

App Files Files Community

DawnC commited on Dec 29, 2025

Commit

8945bef

verified ·

1 Parent(s): 7b2098a

Upload 4 files

Browse files

Files changed (4) hide show

FlowFacade.py +4 -7
VideoEngine_optimized.py +12 -18
prompt_examples.py +14 -14
ui_manager.py +35 -11

FlowFacade.py CHANGED Viewed

@@ -29,7 +29,7 @@ class FlowFacade:
     def _calculate_gpu_duration(self, image: Image.Image, duration_seconds: float,
                                 num_inference_steps: int, enable_prompt_expansion: bool, **kwargs) -> int:
         BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
-        BASE_STEP_DURATION = 8  # FP8 + AOTI optimized (fast direct GPU)
         resized_image = self.video_engine.resize_image(image)
         width, height = resized_image.width, resized_image.height
@@ -39,15 +39,13 @@ class FlowFacade:
         step_duration = BASE_STEP_DURATION * factor ** 1.5
         total_duration = int(num_inference_steps) * step_duration
-        # Add overhead for first-time model loading (FP8 quantization + AOTI)
         if not self.video_engine.is_loaded:
-            total_duration += 150  # ~150s for FP8 quantization, AOTI download/loading, and LoRA fusion
         if enable_prompt_expansion:
             total_duration += 40
-        # Conservative minimum: 240 seconds (4 minutes) for first run with all optimizations
-        # Subsequent runs will be much faster (~60-80s)
         return max(int(total_duration), 240)
     @spaces.GPU(duration=_calculate_gpu_duration)
@@ -136,8 +134,7 @@ class FlowFacade:
             "quantization": quantization_type,
             "optimizations": [
                 "Lightning LoRA (4-8 steps)",
-                f"{quantization_type} Quantization",
-                "AoT Compilation (if available)"
             ]
         }

     def _calculate_gpu_duration(self, image: Image.Image, duration_seconds: float,
                                 num_inference_steps: int, enable_prompt_expansion: bool, **kwargs) -> int:
         BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
+        BASE_STEP_DURATION = 8
         resized_image = self.video_engine.resize_image(image)
         width, height = resized_image.width, resized_image.height
         step_duration = BASE_STEP_DURATION * factor ** 1.5
         total_duration = int(num_inference_steps) * step_duration
+        # Add overhead for first-time model loading
         if not self.video_engine.is_loaded:
+            total_duration += 150
         if enable_prompt_expansion:
             total_duration += 40
         return max(int(total_duration), 240)
     @spaces.GPU(duration=_calculate_gpu_duration)
             "quantization": quantization_type,
             "optimizations": [
                 "Lightning LoRA (4-8 steps)",
+                f"{quantization_type} Quantization"
             ]
         }

VideoEngine_optimized.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
-DeltaFlow - Video Engine (FP8 + AOTI Optimized)
 Ultra-fast Image-to-Video generation using Wan2.2-I2V-A14B
-Features: Lightning LoRA + FP8 Quantization + AOTI Compilation
-~30-40s inference (vs 150s baseline)
 """
 import warnings
@@ -31,8 +31,8 @@ from diffusers.utils.export_utils import export_to_video
 class VideoEngine:
     """
-    Ultra-fast video generation with FP8 quantization and AOTI compilation.
-    30-40s inference time (compared to 150s baseline).
     """
     MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
@@ -75,7 +75,7 @@ class VideoEngine:
         try:
             print("=" * 60)
-            print("Loading Wan2.2 I2V Engine with FP8 + AOTI")
             print("=" * 60)
             # Stage 1: Load base pipeline to CPU
@@ -131,7 +131,7 @@ class VideoEngine:
                     int8_weight_only
                 )
-                # Quantize text encoder (INT8) - using version 2 API
                 quantize_(self.pipeline.text_encoder, int8_weight_only())
                 # Quantize transformers (FP8)
@@ -149,12 +149,10 @@ class VideoEngine:
                 print(f"⚠ Quantization failed: {e}")
                 raise RuntimeError("FP8 quantization required for this optimized version")
-            # Stage 4: AOTI blocks (currently disabled for stability)
-            print("→ [4/5] Skipping AOTI (using FP8 only for stability)...")
-            # AOTI can cause runtime errors with certain PyTorch versions
-            # FP8 quantization alone provides excellent performance
             self.use_aoti = False
-            print("✓ Using FP8 quantization only (stable and fast)")
             # Stage 5: Move to GPU and enable optimizations
             print("→ [5/5] Moving to GPU...")
@@ -188,13 +186,10 @@ class VideoEngine:
                 pass
             self.is_loaded = True
-            mode = "FP8 + AOTI" if self.use_aoti else "FP8 only"
             print("=" * 60)
-            print(f"✓ VideoEngine Ready - {mode}")
             print(f"  • Device: {self.device}")
             print(f"  • Quantization: FP8 (50% memory reduction)")
-            print(f"  • AOTI: {'Enabled (1.5-1.8x speedup)' if self.use_aoti else 'Disabled'}")
-            print(f"  • Expected inference: {'~30-40s' if self.use_aoti else '~60-70s'}")
             print("=" * 60)
         except Exception as e:
@@ -264,7 +259,7 @@ class VideoEngine:
         guidance_scale_2: float = 1.0,
         seed: int = 42,
     ) -> str:
-        """Generate video from image with FP8 + AOTI optimization."""
         if not self.is_loaded:
             raise RuntimeError("VideoEngine not loaded. Call load_model() first.")
@@ -277,7 +272,6 @@ class VideoEngine:
             print(f"  • Resolution: {resized_image.width}x{resized_image.height}")
             print(f"  • Frames: {num_frames} ({duration_seconds}s @ {self.FIXED_FPS}fps)")
             print(f"  • Steps: {num_inference_steps}")
-            print(f"  • Mode: {'FP8 + AOTI' if self.use_aoti else 'FP8 only'}")
             # Memory cleanup
             gc.collect()

 """
+DeltaFlow - Video Engine (FP8 Optimized)
 Ultra-fast Image-to-Video generation using Wan2.2-I2V-A14B
+Features: Lightning LoRA + FP8 Quantization
+~70-90s inference (vs 150s baseline)
 """
 import warnings
 class VideoEngine:
     """
+    Ultra-fast video generation with FP8 quantization.
+    70-90s inference time (compared to 150s baseline).
     """
     MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
         try:
             print("=" * 60)
+            print("Loading Wan2.2 I2V Engine with FP8 Quantization")
             print("=" * 60)
             # Stage 1: Load base pipeline to CPU
                     int8_weight_only
                 )
+                # Quantize text encoder (INT8)
                 quantize_(self.pipeline.text_encoder, int8_weight_only())
                 # Quantize transformers (FP8)
                 print(f"⚠ Quantization failed: {e}")
                 raise RuntimeError("FP8 quantization required for this optimized version")
+            # Stage 4: AOTI compilation (disabled for stability)
+            print("→ [4/5] Skipping AOTI compilation...")
             self.use_aoti = False
+            print("✓ Using FP8 quantization only")
             # Stage 5: Move to GPU and enable optimizations
             print("→ [5/5] Moving to GPU...")
                 pass
             self.is_loaded = True
             print("=" * 60)
+            print("✓ VideoEngine Ready")
             print(f"  • Device: {self.device}")
             print(f"  • Quantization: FP8 (50% memory reduction)")
             print("=" * 60)
         except Exception as e:
         guidance_scale_2: float = 1.0,
         seed: int = 42,
     ) -> str:
+        """Generate video from image with FP8 quantization."""
         if not self.is_loaded:
             raise RuntimeError("VideoEngine not loaded. Call load_model() first.")
             print(f"  • Resolution: {resized_image.width}x{resized_image.height}")
             print(f"  • Frames: {num_frames} ({duration_seconds}s @ {self.FIXED_FPS}fps)")
             print(f"  • Steps: {num_inference_steps}")
             # Memory cleanup
             gc.collect()

prompt_examples.py CHANGED Viewed

@@ -1,20 +1,16 @@
 PROMPT_EXAMPLES = {
-    "Fashion / Beauty Portrait": [
         "Hair flows elegantly, model gazes confidently at camera, studio lighting highlights facial features, high-fashion editorial",
-        "Dramatic hair whip in slow motion, fierce eye contact with camera, wind machine effect, hair flies dynamically across frame",
         "Model's head tilts back with confidence, hair cascades like waterfall, powerful gaze intensifies, editorial vogue style",
         "Explosive hair toss left to right, eyes lock onto camera seductively, strobe lighting flashes, high-energy fashion film",
-        "Hand gracefully sweeps through hair, fingers run through strands, sultry gaze follows movement, intimate beauty moment",
-        "Model touches hair delicately, hand brushes cheek softly, eyes sparkle with emotion, romantic close-up shot",
-        "Hair flips dramatically to one side, hand catches falling strands, confident smile emerges, dynamic fashion energy",
-        "Slow-motion head turn reveals profile, hand tucks hair behind ear elegantly, studio lights create dramatic shadows",
         "Subtle wink emerges slowly, one eye closes playfully, lips curve into flirty smile, head tilts coyly, seductive charm",
         "Radiant smile spreads across face, eyes sparkle with joy, cheeks lift naturally, warm genuine happiness radiates",
         "Seductive gaze intensifies, eyes narrow alluringly, lips part slightly, slow blink follows, smoldering fashion intensity",
         "Playful wink with knowing smile, eyebrow raises suggestively, head turns to camera confidently, charismatic energy",
     ],
-    "Portrait / Character - Subtle": [
         "Subject turns head sharply to camera, eyes widen with surprise, hair swings dramatically, emotional close-up",
         "Person laughs heartily, head tilts back, genuine joy radiates, natural lighting shifts warmly",
         "Character looks around curiously, head movements follow unseen object, eyes track motion, engaging storytelling",
@@ -25,10 +21,14 @@ PROMPT_EXAMPLES = {
         "Quick wink and friendly smile, eyebrows lift playfully, natural cheerful expression, approachable energy",
     ],
-    "Portrait / Character - Dynamic": [
         "Hand waves enthusiastically in front of camera, fingers spread wide, big smile accompanies gesture, friendly greeting",
         "Subject raises hand to forehead dramatically, gasps in realization, eyes widen, theatrical reaction shot",
-        "Hand brushes hair back confidently, head tilts to side, playful wink follows, charismatic personality shine",
         "Person covers mouth while laughing, shoulders shake, hand gestures expressively, genuine candid moment",
         "Subject points at camera playfully, leans forward, grin widens, interactive engaging energy",
         "Hand touches chin thoughtfully, eyes look upward pondering, subtle head tilt, contemplative character study",
@@ -36,7 +36,7 @@ PROMPT_EXAMPLES = {
         "Person adjusts glasses with one hand, smirks confidently, eyebrow raises, smart intellectual vibe",
     ],
-    "Animals - Lively": [
         "Dog's head tilts adorably, ears perk up alert, tail wags enthusiastically, playful curious energy",
         "Cat stretches luxuriously, yawns showing teeth, blinks slowly then gazes directly at camera, feline grace",
         "Bird fluffs feathers, hops energetically, head bobs rhythmically, chirping motion implied, vibrant life",
@@ -47,7 +47,7 @@ PROMPT_EXAMPLES = {
         "Squirrel's cheeks puff while chewing, tiny paws hold food, tail flicks nervously, adorable wild moment",
     ],
-    "Landscape / Nature": [
         "Camera swoops down from sky to ground, clouds race overhead, wind rushes through trees violently, epic establishing shot",
         "Waves crash powerfully against rocks, water explodes upward in slow motion, dramatic sunset colors intensify",
         "Time-lapse effect: clouds rush across sky rapidly, shadows race across landscape, day transforms to golden hour",
@@ -58,7 +58,7 @@ PROMPT_EXAMPLES = {
         "Ocean tide rushes in, foam spreads across sand, seagulls take flight, peaceful coastal rhythm",
     ],
-    "Animation / Cartoon": [
         "Character jumps high with exaggerated stretch, lands with bouncy squash, eyes pop out comically, cartoony physics",
         "Magical transformation sequence, sparkles explode everywhere, character spins rapidly, colors shift vibrantly, anime style",
         "Character does double-take, eyes bulge hugely, jaw drops to floor, classic cartoon reaction shot",
@@ -69,7 +69,7 @@ PROMPT_EXAMPLES = {
         "Fighting pose sequence: character winds up punch, muscles flex, impact lines radiate, shonen battle energy",
     ],
-    "Product / Object": [
         "Product explodes into component parts, pieces float and rotate individually, reassembles dramatically, technical showcase",
         "360-degree rotation accelerates into fast spin, dramatic lighting sweeps across surface, particle effects add premium feel",
         "Camera dive-bombs toward product, extreme close-up reveals texture details, pulls back to reveal full item dramatically",
@@ -80,7 +80,7 @@ PROMPT_EXAMPLES = {
         "Product materializes from particles, glowing assembly process, high-tech materialization effect, sci-fi showcase",
     ],
-    "Abstract / Artistic": [
         "Explosion of colors radiates from center, patterns fractal outward infinitely, hypnotic kaleidoscope effect intensifies",
         "Liquid paint flows and swirls violently, colors blend and separate, organic fluid simulation, mesmerizing motion",
         "Geometric shapes shatter and reform, pieces scatter then snap back together, glitch art aesthetic",

 PROMPT_EXAMPLES = {
+    "💃 Fashion / Beauty (Facial Only)": [
         "Hair flows elegantly, model gazes confidently at camera, studio lighting highlights facial features, high-fashion editorial",
+        "Dramatic hair whip in slow motion, fierce eye contact with camera, wind effect, hair flies dynamically across frame",
         "Model's head tilts back with confidence, hair cascades like waterfall, powerful gaze intensifies, editorial vogue style",
         "Explosive hair toss left to right, eyes lock onto camera seductively, strobe lighting flashes, high-energy fashion film",
         "Subtle wink emerges slowly, one eye closes playfully, lips curve into flirty smile, head tilts coyly, seductive charm",
         "Radiant smile spreads across face, eyes sparkle with joy, cheeks lift naturally, warm genuine happiness radiates",
         "Seductive gaze intensifies, eyes narrow alluringly, lips part slightly, slow blink follows, smoldering fashion intensity",
         "Playful wink with knowing smile, eyebrow raises suggestively, head turns to camera confidently, charismatic energy",
     ],
+    "🎭 Portrait - Subtle Expressions": [
         "Subject turns head sharply to camera, eyes widen with surprise, hair swings dramatically, emotional close-up",
         "Person laughs heartily, head tilts back, genuine joy radiates, natural lighting shifts warmly",
         "Character looks around curiously, head movements follow unseen object, eyes track motion, engaging storytelling",
         "Quick wink and friendly smile, eyebrows lift playfully, natural cheerful expression, approachable energy",
     ],
+    "🙌 Portrait - Dynamic (Hands Visible Required)": [
+        "Hair flows elegantly, hand gracefully sweeps through strands, fingers run softly, sultry gaze follows movement, beauty close-up",
+        "Model touches hair delicately, hand brushes cheek softly, eyes sparkle with emotion, romantic intimate shot",
+        "Dramatic hair flip to side, hand catches falling strands, confident smile emerges, dynamic fashion energy",
+        "Slow-motion head turn reveals profile, hand tucks hair behind ear elegantly, studio lights create dramatic shadows",
         "Hand waves enthusiastically in front of camera, fingers spread wide, big smile accompanies gesture, friendly greeting",
         "Subject raises hand to forehead dramatically, gasps in realization, eyes widen, theatrical reaction shot",
+        "Hand brushes hair back confidently, head tilts to side, playful wink follows, charismatic personality shines",
         "Person covers mouth while laughing, shoulders shake, hand gestures expressively, genuine candid moment",
         "Subject points at camera playfully, leans forward, grin widens, interactive engaging energy",
         "Hand touches chin thoughtfully, eyes look upward pondering, subtle head tilt, contemplative character study",
         "Person adjusts glasses with one hand, smirks confidently, eyebrow raises, smart intellectual vibe",
     ],
+    "🐾 Animals - Lively": [
         "Dog's head tilts adorably, ears perk up alert, tail wags enthusiastically, playful curious energy",
         "Cat stretches luxuriously, yawns showing teeth, blinks slowly then gazes directly at camera, feline grace",
         "Bird fluffs feathers, hops energetically, head bobs rhythmically, chirping motion implied, vibrant life",
         "Squirrel's cheeks puff while chewing, tiny paws hold food, tail flicks nervously, adorable wild moment",
     ],
+    "🌄 Landscape / Nature": [
         "Camera swoops down from sky to ground, clouds race overhead, wind rushes through trees violently, epic establishing shot",
         "Waves crash powerfully against rocks, water explodes upward in slow motion, dramatic sunset colors intensify",
         "Time-lapse effect: clouds rush across sky rapidly, shadows race across landscape, day transforms to golden hour",
         "Ocean tide rushes in, foam spreads across sand, seagulls take flight, peaceful coastal rhythm",
     ],
+    "✨ Animation / Cartoon": [
         "Character jumps high with exaggerated stretch, lands with bouncy squash, eyes pop out comically, cartoony physics",
         "Magical transformation sequence, sparkles explode everywhere, character spins rapidly, colors shift vibrantly, anime style",
         "Character does double-take, eyes bulge hugely, jaw drops to floor, classic cartoon reaction shot",
         "Fighting pose sequence: character winds up punch, muscles flex, impact lines radiate, shonen battle energy",
     ],
+    "📦 Product / Object": [
         "Product explodes into component parts, pieces float and rotate individually, reassembles dramatically, technical showcase",
         "360-degree rotation accelerates into fast spin, dramatic lighting sweeps across surface, particle effects add premium feel",
         "Camera dive-bombs toward product, extreme close-up reveals texture details, pulls back to reveal full item dramatically",
         "Product materializes from particles, glowing assembly process, high-tech materialization effect, sci-fi showcase",
     ],
+    "🎨 Abstract / Artistic": [
         "Explosion of colors radiates from center, patterns fractal outward infinitely, hypnotic kaleidoscope effect intensifies",
         "Liquid paint flows and swirls violently, colors blend and separate, organic fluid simulation, mesmerizing motion",
         "Geometric shapes shatter and reform, pieces scatter then snap back together, glitch art aesthetic",

ui_manager.py CHANGED Viewed

@@ -40,6 +40,12 @@ class UIManager:
                         height=320
                     )
                     prompt_input = gr.Textbox(
                         label="Motion Instruction",
                         placeholder="Describe camera movements (zoom, pan, orbit) and subject actions (head turn, hair flow, expression change). Be specific and cinematic! Example: 'Camera slowly zooms in, subject's eyes sparkle, hair flows gently in wind'",
@@ -51,24 +57,24 @@ class UIManager:
                     category_dropdown = gr.Dropdown(
                         choices=list(PROMPT_EXAMPLES.keys()),
                         label="💡 Quick Prompt Category",
-                        value="Fashion / Beauty Portrait",
                         interactive=True
                     )
                     example_dropdown = gr.Dropdown(
-                        choices=PROMPT_EXAMPLES["Fashion / Beauty Portrait"],
                         label="Example Prompts (click to use)",
-                        value=None,  # Start with no selection to ensure first click works
                         interactive=True
                     )
                     # Quality tips banner (blue)
                     gr.HTML("""
                         <div class="quality-banner">
-                            <strong>💡 Quality Tips for Best Results:</strong><br>
-                            • <strong>Describe what's IN the image:</strong> For Example: If hands aren't visible, don't mention hand movements<br>
-                            • <strong>Use example prompts:</strong> They're tested and optimized for this model<br>
-                            • <strong>Keep motions simple:</strong> Focus on head turns, expressions, camera movements
                         </div>
                     """)
@@ -131,12 +137,14 @@ class UIManager:
                                 value=42,
                                 precision=0,
                                 minimum=0,
-                                maximum=2147483647
                             )
                             randomize_seed = gr.Checkbox(
                                 label="Randomize Seed",
-                                value=True
                             )
                         enable_ai_prompt = gr.Checkbox(
@@ -186,8 +194,10 @@ class UIManager:
             gr.HTML("""
                 <div class="footer">
                     <p style="font-size: 0.9rem;">
-                        <strong>Powered by:</strong>
-                        Wan2.2-I2V-A14B · Qwen2.5-0.5B · Lightning LoRA
                     </p>
                 </div>
             """)
@@ -198,10 +208,24 @@ class UIManager:
             def fill_prompt(selected_example):
                 return selected_example if selected_example else ""
             category_dropdown.change(fn=update_examples, inputs=[category_dropdown],
                                     outputs=[example_dropdown])
             example_dropdown.change(fn=fill_prompt, inputs=[example_dropdown],
                                    outputs=[prompt_input])
             generate_btn.click(
                 fn=self._handle_generation,

                         height=320
                     )
+                    resolution_info = gr.Markdown(
+                        value="",
+                        visible=False,
+                        elem_classes="info-text"
+                    )
                     prompt_input = gr.Textbox(
                         label="Motion Instruction",
                         placeholder="Describe camera movements (zoom, pan, orbit) and subject actions (head turn, hair flow, expression change). Be specific and cinematic! Example: 'Camera slowly zooms in, subject's eyes sparkle, hair flows gently in wind'",
                     category_dropdown = gr.Dropdown(
                         choices=list(PROMPT_EXAMPLES.keys()),
                         label="💡 Quick Prompt Category",
+                        value="💃 Fashion / Beauty (Facial Only)",
                         interactive=True
                     )
                     example_dropdown = gr.Dropdown(
+                        choices=PROMPT_EXAMPLES["💃 Fashion / Beauty (Facial Only)"],
                         label="Example Prompts (click to use)",
+                        value=None,
                         interactive=True
                     )
                     # Quality tips banner (blue)
                     gr.HTML("""
                         <div class="quality-banner">
+                            <strong>💡 Choose the Right Prompt Category:</strong><br>
+                            • <strong>💃 Facial Only:</strong> Safe for headshots and portraits without visible hands<br>
+                            • <strong>🙌 Hands Visible Required:</strong> Only use if hands are fully visible in your image (prevents artifacts)<br>
+                            • <strong>🌄 Scenery/Objects:</strong> For landscapes, products, and abstract content
                         </div>
                     """)
                                 value=42,
                                 precision=0,
                                 minimum=0,
+                                maximum=2147483647,
+                                info="Use same seed for reproducible results"
                             )
                             randomize_seed = gr.Checkbox(
                                 label="Randomize Seed",
+                                value=True,
+                                info="Generate different results each time"
                             )
                         enable_ai_prompt = gr.Checkbox(
             gr.HTML("""
                 <div class="footer">
                     <p style="font-size: 0.9rem;">
+                        <strong>Powered by:</strong><br>
+                        <a href="https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers" target="_blank" style="color: #6366f1; text-decoration: none;">Wan2.2-I2V-A14B</a> (Wan-AI, optimized by <a href="https://huggingface.co/cbensimon" target="_blank" style="color: #6366f1; text-decoration: none;">cbensimon</a>)
+                        · Lightning LoRA (<a href="https://huggingface.co/Kijai/WanVideo_comfy" target="_blank" style="color: #6366f1; text-decoration: none;">Lightx2v</a>)
+                        · <a href="https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct" target="_blank" style="color: #6366f1; text-decoration: none;">Qwen2.5-0.5B</a>
                     </p>
                 </div>
             """)
             def fill_prompt(selected_example):
                 return selected_example if selected_example else ""
+            def show_resolution_info(image):
+                if image is None:
+                    return "", False
+                from PIL import Image
+                original_w, original_h = image.size
+                resized_image = self.facade.video_engine.resize_image(image)
+                output_w, output_h = resized_image.width, resized_image.height
+                info = f"**📐 Resolution:** Input: {original_w}×{original_h} → Output: {output_w}×{output_h}"
+                return info, True
             category_dropdown.change(fn=update_examples, inputs=[category_dropdown],
                                     outputs=[example_dropdown])
             example_dropdown.change(fn=fill_prompt, inputs=[example_dropdown],
                                    outputs=[prompt_input])
+            image_input.change(fn=show_resolution_info, inputs=[image_input],
+                             outputs=[resolution_info, resolution_info])
             generate_btn.click(
                 fn=self._handle_generation,