Spaces:

Yuanshi
/

ViBT

Running on Zero

App Files Files Community

Yuanshi commited on 18 days ago

Commit

6359789

1 Parent(s): ab20f51

update

Browse files

Files changed (1) hide show

app.py +97 -41

app.py CHANGED Viewed

@@ -5,6 +5,32 @@ import random
 import gradio as gr
 import spaces
 @dataclass(frozen=True)
 class SliderConfig:
@@ -29,47 +55,55 @@ GAMMA_SLIDER = SliderConfig(
     maximum=10.0,
     step=0.5,
     value=5.0,
-    info="Scheduler adjustment parameter."
 )
 STEP_SLIDER = SliderConfig(
     label="Inference Steps",
-    minimum=10,
-    maximum=50,
     step=1,
-    value=28,
-    info="More steps improve quality but take longer."
 )
 GUIDANCE_SLIDER = SliderConfig(
     label="Guidance Scale (CFG)",
     minimum=1.0,
-    maximum=20.0,
     step=0.5,
-    value=1.5,
-    info="Controls adherence to the text prompt."
 )
 STYLE_CHOICES = [
-    "Oil painting style, vivid colors",
-    "Neon cyberpunk, futuristic city",
-    "Minimalist sketch, soft shading",
-    "Anime aesthetic, bold lines",
 ]
 EXAMPLE_INPUTS = [
-    ["assets/video_00000000.mp4", "Oil painting style, vivid colors"],
-    ["assets/video_00000007.mp4", "Neon cyberpunk, futuristic city"],
-    ["assets/video_00000107.mp4", "Minimalist sketch, soft shading"],
 ]
 PRESET_MODES = {
-    "Fast": PresetConfig(shift_gamma=5.0, steps=10, guidance_scale=1.),
-    "Balanced": PresetConfig(shift_gamma=5.0, steps=10, guidance_scale=1.5),
-    "Quality": PresetConfig(shift_gamma=5.0, steps=20, guidance_scale=1.5),
 }
@@ -112,7 +146,30 @@ def run_stylization(
         f"Seed={resolved_seed}"
     )
-    return input_video_path
 def _resolve_seed(seed_value, randomize):
@@ -175,26 +232,31 @@ def build_demo() -> gr.Blocks:
             with gr.Row():
                 with gr.Column(scale=1) as control_col:
                     with gr.Tabs():
                         with gr.Tab("Quick Generate"):
-                            prompt_quick = gr.Dropdown(
-                                label="Style Instruction",
-                                choices=STYLE_CHOICES,
-                                value=STYLE_CHOICES[0],
-                                allow_custom_value=False,
-                            )
                             with gr.Row():
-                                fast_btn = gr.Button("⚡ Fast Generate", variant="primary")
-                                balanced_btn = gr.Button("🎯 Balanced Generate", variant="primary")
-                                quality_btn = gr.Button("🌟 Quality Generate", variant="primary")
                             _bind_preset_button(
                                 button=fast_btn,
                                 preset_key="Fast",
                                 inputs=[
                                     input_video,
-                                    prompt_quick,
                                 ],
                                 output=output_video,
                                 extra_kwargs={"seed": None, "randomize_seed": True},
@@ -204,7 +266,7 @@ def build_demo() -> gr.Blocks:
                                 preset_key="Balanced",
                                 inputs=[
                                     input_video,
-                                    prompt_quick,
                                 ],
                                 output=output_video,
                                 extra_kwargs={"seed": None, "randomize_seed": True},
@@ -214,19 +276,13 @@ def build_demo() -> gr.Blocks:
                                 preset_key="Quality",
                                 inputs=[
                                     input_video,
-                                    prompt_quick,
                                 ],
                                 output=output_video,
                                 extra_kwargs={"seed": None, "randomize_seed": True},
                             )
                         with gr.Tab("Advanced Settings"):
-                            prompt_adv = gr.Dropdown(
-                                label="Style Instruction",
-                                choices=STYLE_CHOICES,
-                                value=STYLE_CHOICES[0],
-                                allow_custom_value=True,
-                            )
                             with gr.Row():
                                 shift_gamma = _create_slider(GAMMA_SLIDER)
                                 guidance_scale = _create_slider(GUIDANCE_SLIDER)
@@ -235,7 +291,7 @@ def build_demo() -> gr.Blocks:
                                 num_steps = _create_slider(STEP_SLIDER)
                                 randomize_seed_adv = gr.Checkbox(
                                     label="Randomize Seed",
-                                    value=False,
                                     info="Checked = new random seed each run. Uncheck to provide your own seed.",
                                 )
@@ -251,7 +307,7 @@ def build_demo() -> gr.Blocks:
                                 fn=run_stylization,
                                 inputs=[
                                     input_video,
-                                    prompt_adv,
                                     shift_gamma,
                                     num_steps,
                                     guidance_scale,
@@ -264,7 +320,7 @@ def build_demo() -> gr.Blocks:
                 with gr.Column(scale=1):
                     gr.Examples(
                         examples=EXAMPLE_INPUTS,
-                        inputs=[input_video, prompt_quick, prompt_adv],
                         label="Example inputs",
                     )

 import gradio as gr
 import spaces
+import torch
+from diffusers import WanPipeline
+from diffusers.utils import export_to_video, load_video
+from vibt.wan import load_vibt_weight, encode_video
+from vibt.scheduler import ViBTScheduler
+import tempfile
+import os
+import cv2
+def get_fps(path):
+    cap = cv2.VideoCapture(path)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    cap.release()
+    return fps
+base_model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
+pipe = WanPipeline.from_pretrained(base_model_id, torch_dtype=torch.bfloat16).to("cuda")
+load_vibt_weight(
+    pipe.transformer,
+    "Yuanshi/ViBT",
+    "video/video_stylization.safetensors",
+)
+pipe.scheduler = ViBTScheduler.from_scheduler(pipe.scheduler)
 @dataclass(frozen=True)
 class SliderConfig:
     maximum=10.0,
     step=0.5,
     value=5.0,
+    info="Scheduler adjustment parameter.",
 )
 STEP_SLIDER = SliderConfig(
     label="Inference Steps",
+    minimum=6,
+    maximum=28,
     step=1,
+    value=10,
+    info="More steps improve quality but take longer.",
 )
 GUIDANCE_SLIDER = SliderConfig(
     label="Guidance Scale (CFG)",
     minimum=1.0,
+    maximum=5.0,
     step=0.5,
+    value=2,
+    info="Controls adherence to the text prompt.",
 )
 STYLE_CHOICES = [
+    "Make it Illustration style.",
+    "Make it a drawing by Van Gogh.",
+    "Make it a pencil sketch style.",
+    "Make it watercolor drawing style.",
+    "Make it a Pixel Art.",
+    "Make it a Japanese anime style, cel shading.",
+    "Make it the style of Neon Light Art.",
+    "Make it papercut style.",
+    "Make it a blueprint.",
+    "Make it Comic Book Style.",
+    "Render the subject as a classical sculpture carved from a single block of pristine white marble.",
 ]
 EXAMPLE_INPUTS = [
+    ["assets/video_00000000.mp4", STYLE_CHOICES[0]],
+    ["assets/video_00000007.mp4", STYLE_CHOICES[1]],
+    ["assets/video_00000019.mp4", STYLE_CHOICES[2]],
+    ["assets/video_00000071.mp4", STYLE_CHOICES[3]],
 ]
 PRESET_MODES = {
+    "Fast": PresetConfig(shift_gamma=5.0, steps=6, guidance_scale=2),
+    "Balanced": PresetConfig(shift_gamma=5.0, steps=10, guidance_scale=2),
+    "Quality": PresetConfig(shift_gamma=5.0, steps=20, guidance_scale=2),
 }
         f"Seed={resolved_seed}"
     )
+    source_video = load_video(input_video_path)
+    source_video = [each.resize((832, 480)) for each in source_video][:81]
+    if len(source_video) < 81:
+        source_video += [source_video[-1]] * (81 - len(source_video))
+    source_fps = get_fps(input_video_path)
+    source_latents = encode_video(pipe, source_video)
+    pipe.scheduler.set_parameters(
+        noise_scale=1.0, shift_gamma=shift_gamma, seed=resolved_seed
+    )
+    output = pipe(
+        prompt=prompt,
+        num_inference_steps=steps,
+        guidance_scale=guidance_scale,
+        latents=source_latents,
+    ).frames[0]
+    tmp_dir = tempfile.mkdtemp()
+    out_path = os.path.join(tmp_dir, f"{random.randint(0, 2**31 - 1)}.mp4")
+    export_to_video(output, out_path, fps=source_fps)
+    print(out_path)
+    return out_path
 def _resolve_seed(seed_value, randomize):
             with gr.Row():
                 with gr.Column(scale=1) as control_col:
+                    prompt = gr.Dropdown(
+                        label="Style Instruction",
+                        choices=STYLE_CHOICES,
+                        value=STYLE_CHOICES[0],
+                        allow_custom_value=True,
+                    )
                     with gr.Tabs():
                         with gr.Tab("Quick Generate"):
                             with gr.Row():
+                                fast_btn = gr.Button(
+                                    "⚡ Fast Generate", variant="primary"
+                                )
+                                balanced_btn = gr.Button(
+                                    "🎯 Balanced Generate", variant="primary"
+                                )
+                                quality_btn = gr.Button(
+                                    "🌟 Quality Generate", variant="primary"
+                                )
                             _bind_preset_button(
                                 button=fast_btn,
                                 preset_key="Fast",
                                 inputs=[
                                     input_video,
+                                    prompt,
                                 ],
                                 output=output_video,
                                 extra_kwargs={"seed": None, "randomize_seed": True},
                                 preset_key="Balanced",
                                 inputs=[
                                     input_video,
+                                    prompt,
                                 ],
                                 output=output_video,
                                 extra_kwargs={"seed": None, "randomize_seed": True},
                                 preset_key="Quality",
                                 inputs=[
                                     input_video,
+                                    prompt,
                                 ],
                                 output=output_video,
                                 extra_kwargs={"seed": None, "randomize_seed": True},
                             )
                         with gr.Tab("Advanced Settings"):
                             with gr.Row():
                                 shift_gamma = _create_slider(GAMMA_SLIDER)
                                 guidance_scale = _create_slider(GUIDANCE_SLIDER)
                                 num_steps = _create_slider(STEP_SLIDER)
                                 randomize_seed_adv = gr.Checkbox(
                                     label="Randomize Seed",
+                                    value=True,
                                     info="Checked = new random seed each run. Uncheck to provide your own seed.",
                                 )
                                 fn=run_stylization,
                                 inputs=[
                                     input_video,
+                                    prompt,
                                     shift_gamma,
                                     num_steps,
                                     guidance_scale,
                 with gr.Column(scale=1):
                     gr.Examples(
                         examples=EXAMPLE_INPUTS,
+                        inputs=[input_video, prompt],
                         label="Example inputs",
                     )