Spaces:

yhzhai
/

mcm

Runtime error

App Files Files Community

yhzhai commited on Jun 18, 2024

Commit

18eb2fe

1 Parent(s): 08f4103

add resolution

Browse files

Files changed (1) hide show

app.py +96 -25

app.py CHANGED Viewed

@@ -35,9 +35,7 @@ def get_modelscope_pipeline(
     #         model_id, torch_dtype=torch.float16, variant="fp16"
     #     )
     # else:
-    pipe = DiffusionPipeline.from_pretrained(
-        model_id
-    )
     scheduler = LCMScheduler.from_pretrained(
         model_id,
         subfolder="scheduler",
@@ -98,12 +96,10 @@ def get_animatediff_pipeline(
     #         torch_dtype=torch.float16,
     #     )
     # else:
-    adapter = MotionAdapter.from_pretrained(
-        motion_module_path
-    )
     pipe = AnimateDiffPipeline.from_pretrained(
         model_id,
-        motion_adapter=adapter,
     )
     scheduler = LCMScheduler.from_pretrained(
         model_id,
@@ -141,7 +137,13 @@ def get_animatediff_pipeline(
 pipe_dict = {
-    "ModelScope T2V": {"WebVid": None, "LAION-aes": None, "Anime": None, "Realistic": None, "3D Cartoon": None},
     "AnimateDiff (SD1.5)": {"WebVid": None, "LAION-aes": None},
     "AnimateDiff (RealisticVision)": {"WebVid": None, "LAION-aes": None},
     "AnimateDiff (epiCRealism)": {"WebVid": None, "LAION-aes": None},
@@ -179,9 +181,17 @@ cache_pipeline = {
 #                 else:
 #                     raise ValueError(f"Unknown base_model {base_model}")
-@spaces.GPU(duration=120)
 def infer(
-    base_model, variant, prompt, num_inference_steps=4, seed=0, randomize_seed=True,
 ):
     # if pipe_dict[base_model][variant] is None:
     #     if base_model == "ModelScope T2V":
@@ -245,12 +255,14 @@ def infer(
     generator = torch.Generator("cpu").manual_seed(seed)
-    # progress=gr.Progress(track_tqdm=True)
     output = cache_pipeline["pipeline"](
         prompt=prompt,
         num_frames=16,
         guidance_scale=1.0,
         num_inference_steps=num_inference_steps,
         generator=generator,
     ).frames
     if not isinstance(output, list):
@@ -275,50 +287,69 @@ examples = [
         "ModelScope T2V",
         "LAION-aes",
         "Aerial uhd 4k view. mid-air flight over fresh and clean mountain river at sunny summer morning. Green trees and sun rays on horizon. Direct on sun.",
-        4
     ],
-    ["ModelScope T2V", "Anime", "Timelapse misty mountain landscape", 4],
     [
         "ModelScope T2V",
         "WebVid",
         "Back of woman in shorts going near pure creek in beautiful mountains.",
-        4
     ],
     [
         "ModelScope T2V",
         "3D Cartoon",
         "A rotating pandoro (a traditional italian sweet yeast bread, most popular around christmas and new year) being eaten in time-lapse.",
-        4
     ],
     [
         "ModelScope T2V",
         "Realistic",
         "Slow motion avocado with a stone falls and breaks into 2 parts with splashes",
-        4
     ],
     [
         "AnimateDiff (RealisticVision)",
         "LAION-aes",
         "Slow motion of delicious salmon sachimi set with green vegetables leaves served on wood plate. make homemade japanese food at home.-dan",
-        8
     ],
     [
         "AnimateDiff (RealisticVision)",
         "WebVid",
         "Blooming meadow panorama zoom-out shot heavenly clouds and upcoming thunderstorm in mountain range harz, germany.",
-        8
     ],
     [
         "AnimateDiff (RealisticVision)",
         "LAION-aes",
         "A young woman in a yellow sweater uses vr glasses, sitting on the shore of a pond on a background of dark waves. a strong wind develops her hair, the sun's rays are reflected from the water.",
-        8
     ],
     [
         "AnimateDiff (RealisticVision)",
         "LAION-aes",
         "Female running at sunset. healthy fitness concept",
-        8
     ],
 ]
@@ -339,6 +370,7 @@ variants = {
 def update_variant(rs):
     return gr.update(choices=variants[rs], value=None)
 # init_pipelines()
 with gr.Blocks(css=css) as demo:
@@ -362,9 +394,12 @@ with gr.Blocks(css=css) as demo:
         gr.Markdown(
             f"""
-        <p align="center"> Currently running on {device}.</p>
         """
         )
         with gr.Row():
             base_model = gr.Dropdown(
                 label="Base model",
@@ -420,16 +455,50 @@ with gr.Blocks(css=css) as demo:
                             step=1,
                             value=4,
                         )
-            with gr.Column():
                 # result = gr.Video(label="Result", show_label=False, interactive=False, height=512, width=512, autoplay=True)
                 result = gr.Video(
-                    label="Result", show_label=False, interactive=False, autoplay=True, height=512, width=512,
                 )
         gr.Examples(
             examples=examples,
-            inputs=[base_model, variant_dropdown, prompt, num_inference_steps],
             cache_examples=True,
             fn=infer,
             outputs=[result, seed],
@@ -442,6 +511,8 @@ with gr.Blocks(css=css) as demo:
             variant_dropdown,
             prompt,
             num_inference_steps,
             seed,
             randomize_seed,
         ],

     #         model_id, torch_dtype=torch.float16, variant="fp16"
     #     )
     # else:
+    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16")
     scheduler = LCMScheduler.from_pretrained(
         model_id,
         subfolder="scheduler",
     #         torch_dtype=torch.float16,
     #     )
     # else:
+    adapter = MotionAdapter.from_pretrained(motion_module_path)
     pipe = AnimateDiffPipeline.from_pretrained(
         model_id,
+        motion_adapter=adapter, torch_dtype=torch.float16
     )
     scheduler = LCMScheduler.from_pretrained(
         model_id,
 pipe_dict = {
+    "ModelScope T2V": {
+        "WebVid": None,
+        "LAION-aes": None,
+        "Anime": None,
+        "Realistic": None,
+        "3D Cartoon": None,
+    },
     "AnimateDiff (SD1.5)": {"WebVid": None, "LAION-aes": None},
     "AnimateDiff (RealisticVision)": {"WebVid": None, "LAION-aes": None},
     "AnimateDiff (epiCRealism)": {"WebVid": None, "LAION-aes": None},
 #                 else:
 #                     raise ValueError(f"Unknown base_model {base_model}")
+@spaces.GPU(duration=90)
 def infer(
+    base_model,
+    variant,
+    prompt,
+    num_inference_steps=4,
+    height=256,
+    width=256,
+    seed=0,
+    randomize_seed=True,
 ):
     # if pipe_dict[base_model][variant] is None:
     #     if base_model == "ModelScope T2V":
     generator = torch.Generator("cpu").manual_seed(seed)
+    progress = gr.Progress(track_tqdm=True)
     output = cache_pipeline["pipeline"](
         prompt=prompt,
         num_frames=16,
         guidance_scale=1.0,
         num_inference_steps=num_inference_steps,
+        height=height,
+        width=width,
         generator=generator,
     ).frames
     if not isinstance(output, list):
         "ModelScope T2V",
         "LAION-aes",
         "Aerial uhd 4k view. mid-air flight over fresh and clean mountain river at sunny summer morning. Green trees and sun rays on horizon. Direct on sun.",
+        4,
+        256,
+        256,
     ],
+    ["ModelScope T2V", "Anime", "Timelapse misty mountain landscape", 4,
+        256,
+        256,
+     ],
     [
         "ModelScope T2V",
         "WebVid",
         "Back of woman in shorts going near pure creek in beautiful mountains.",
+        4,
+        256,
+        256,
     ],
     [
         "ModelScope T2V",
         "3D Cartoon",
         "A rotating pandoro (a traditional italian sweet yeast bread, most popular around christmas and new year) being eaten in time-lapse.",
+        4,
+        256,
+        256,
     ],
     [
         "ModelScope T2V",
         "Realistic",
         "Slow motion avocado with a stone falls and breaks into 2 parts with splashes",
+        4,
+        256,
+        256,
     ],
     [
         "AnimateDiff (RealisticVision)",
         "LAION-aes",
         "Slow motion of delicious salmon sachimi set with green vegetables leaves served on wood plate. make homemade japanese food at home.-dan",
+        8,
+        512,
+        512,
     ],
     [
         "AnimateDiff (RealisticVision)",
         "WebVid",
         "Blooming meadow panorama zoom-out shot heavenly clouds and upcoming thunderstorm in mountain range harz, germany.",
+        8,
+        512,
+        512,
     ],
     [
         "AnimateDiff (RealisticVision)",
         "LAION-aes",
         "A young woman in a yellow sweater uses vr glasses, sitting on the shore of a pond on a background of dark waves. a strong wind develops her hair, the sun's rays are reflected from the water.",
+        8,
+        512,
+        512,
     ],
     [
         "AnimateDiff (RealisticVision)",
         "LAION-aes",
         "Female running at sunset. healthy fitness concept",
+        8,
+        512,
+        512,
     ],
 ]
 def update_variant(rs):
     return gr.update(choices=variants[rs], value=None)
 # init_pipelines()
 with gr.Blocks(css=css) as demo:
         gr.Markdown(
             f"""
+        <p align="center">Currently running on {device}.</p>
+        <p align="center">Model loading takes extra time.</p>
         """
         )
+        # <p align="center">ModelScope T2V works the best for resolution 256x256, and AnimateDiff works the best for 512x512.</p>
         with gr.Row():
             base_model = gr.Dropdown(
                 label="Base model",
                             step=1,
                             value=4,
                         )
+                    with gr.Group():
+                        with gr.Row():
+                            text_hint = gr.Textbox(
+                                "Hint: ModelScope T2V works the best for resolution 256x256, and AnimateDiff works the best for resolution 512x512.",
+                                interactive=False,
+                                label="Hint",
+                                container=False,
+                            )
+                        with gr.Row():
+                            height = gr.Slider(
+                                label="Height",
+                                minimum=256,
+                                maximum=1024,
+                                step=64,
+                                value=512,
+                                interactive=True,
+                            )
+                            width = gr.Slider(
+                                label="Width",
+                                minimum=256,
+                                maximum=1024,
+                                step=64,
+                                value=512,
+                                interactive=True,
+                            )
+            with gr.Column(show_progress=True):
                 # result = gr.Video(label="Result", show_label=False, interactive=False, height=512, width=512, autoplay=True)
                 result = gr.Video(
+                    label="Result",
+                    show_label=False,
+                    interactive=False,
+                    autoplay=True,
+                    # height=512,
+                    # width=512,
                 )
         gr.Examples(
             examples=examples,
+            inputs=[base_model, variant_dropdown, prompt, num_inference_steps, height, width],
             cache_examples=True,
             fn=infer,
             outputs=[result, seed],
             variant_dropdown,
             prompt,
             num_inference_steps,
+            height,
+            width,
             seed,
             randomize_seed,
         ],