Spaces:

QHL067
/

CrossFlow

Paused

App Files Files Community

QHL067 commited on Mar 15, 2025

Commit

c47371b

1 Parent(s): 585b792

prmopt

Browse files

Files changed (1) hide show

app.py +35 -23

app.py CHANGED Viewed

@@ -150,6 +150,7 @@ def infer(
     guidance_scale,
     num_inference_steps,
     num_of_interpolation,
     save_gpu_memory=True,
     progress=gr.Progress(track_tqdm=True),
 ):
@@ -164,7 +165,10 @@ def infer(
     prompt_dict = {"prompt_1": prompt1, "prompt_2": prompt2}
     for key, value in prompt_dict.items():
         assert value is not None, f"{key} must not be None."
-    assert num_of_interpolation >= 5, "For linear interpolation, please sample at least five images."
     # Get text embeddings and tokens.
     _context, _token_mask, _token, _caption = get_caption(
@@ -181,10 +185,10 @@ def infer(
         # Prepare the initial latent representations based on the number of interpolations.
         if num_of_interpolation == 3:
             # Addition or subtraction mode.
-            if config.prompt_a is not None:
                 assert config.prompt_s is None, "Only one of prompt_a or prompt_s should be provided."
                 z_init_temp = _z_init[0] + _z_init[1]
-            elif config.prompt_s is not None:
                 assert config.prompt_a is None, "Only one of prompt_a or prompt_s should be provided."
                 z_init_temp = _z_init[0] - _z_init[1]
             else:
@@ -194,10 +198,7 @@ def infer(
             _z_init[2] = (z_init_temp - mean) / std
         elif num_of_interpolation == 4:
-            z_init_temp = _z_init[0] + _z_init[1] - _z_init[2]
-            mean = z_init_temp.mean()
-            std = z_init_temp.std()
-            _z_init[3] = (z_init_temp - mean) / std
         elif num_of_interpolation >= 5:
             tensor_a = _z_init[0]
@@ -244,21 +245,25 @@ def infer(
     to_pil = ToPILImage()
     pil_images = [to_pil(img) for img in samples]
-    first_image = pil_images[0]
-    last_image = pil_images[-1]
-    gif_buffer = io.BytesIO()
-    pil_images[0].save(gif_buffer, format="GIF", save_all=True, append_images=pil_images[1:], duration=200, loop=0)
-    gif_buffer.seek(0)
-    gif_bytes = gif_buffer.read()
-    # Save the GIF bytes to a temporary file and get its path
-    temp_gif = tempfile.NamedTemporaryFile(delete=False, suffix=".gif")
-    temp_gif.write(gif_bytes)
-    temp_gif.close()
-    gif_path = temp_gif.name
-    return first_image, last_image, gif_path, seed
     # return first_image, last_image, seed
@@ -269,7 +274,7 @@ def infer(
 # ]
 def infer_tab1(prompt1, prompt2, seed, randomize_seed, guidance_scale, num_inference_steps, num_of_interpolation):
-    default_op = "Addition"
     return infer(prompt1, prompt2, seed, randomize_seed, guidance_scale, num_inference_steps, num_of_interpolation, default_op)
 # Wrapper for Tab 2: Uses operation_mode and fixes num_of_interpolation to 3.
@@ -281,6 +286,10 @@ examples_1 = [
     ["A robot cooking dinner in the kitchen", "An orange cat wearing sunglasses on a ship"],
 ]
 css = """
 #col-container {
     margin: 0 auto;
@@ -464,9 +473,10 @@ with gr.Blocks(css=css) as demo:
             prompt2_tab1 = gr.Text(placeholder="Prompt for second image", label="Prompt 2")
             seed_tab1 = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
             randomize_seed_tab1 = gr.Checkbox(label="Randomize seed", value=True)
-            guidance_scale_tab1 = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=7.0, label="Guidance Scale")
-            num_inference_steps_tab1 = gr.Slider(minimum=1, maximum=50, step=1, value=25, label="Number of Inference Steps")
-            num_of_interpolation_tab1 = gr.Slider(minimum=5, maximum=50, step=1, value=10, label="Number of Images for Interpolation")
             run_button_tab1 = gr.Button("Run")
             first_image_output_tab1 = gr.Image(label="Image of the first prompt")
@@ -520,6 +530,8 @@ with gr.Blocks(css=css) as demo:
                 outputs=[first_image_output_tab2, last_image_output_tab2, gif_output_tab2, seed_tab2]
             )
 if __name__ == "__main__":
     demo.launch()

     guidance_scale,
     num_inference_steps,
     num_of_interpolation,
+    operation_mode,
     save_gpu_memory=True,
     progress=gr.Progress(track_tqdm=True),
 ):
     prompt_dict = {"prompt_1": prompt1, "prompt_2": prompt2}
     for key, value in prompt_dict.items():
         assert value is not None, f"{key} must not be None."
+    if operation_mode != 'Interpolation':
+        assert num_of_interpolation >= 5, "For linear interpolation, please sample at least five images."
+    else:
+        assert num_of_interpolation == 3, "For arithmetic, please sample three images."
     # Get text embeddings and tokens.
     _context, _token_mask, _token, _caption = get_caption(
         # Prepare the initial latent representations based on the number of interpolations.
         if num_of_interpolation == 3:
             # Addition or subtraction mode.
+            if operation_mode == 'Addition':
                 assert config.prompt_s is None, "Only one of prompt_a or prompt_s should be provided."
                 z_init_temp = _z_init[0] + _z_init[1]
+            elif operation_mode == 'Subtraction':
                 assert config.prompt_a is None, "Only one of prompt_a or prompt_s should be provided."
                 z_init_temp = _z_init[0] - _z_init[1]
             else:
             _z_init[2] = (z_init_temp - mean) / std
         elif num_of_interpolation == 4:
+            raise ValueError("Unsupported number of interpolations.")
         elif num_of_interpolation >= 5:
             tensor_a = _z_init[0]
     to_pil = ToPILImage()
     pil_images = [to_pil(img) for img in samples]
+    if num_of_interpolation == 3:
+        return pil_images[0], pil_images[1], pil_images[2], seed
+    else:
+        first_image = pil_images[0]
+        last_image = pil_images[-1]
+        gif_buffer = io.BytesIO()
+        pil_images[0].save(gif_buffer, format="GIF", save_all=True, append_images=pil_images[1:], duration=200, loop=0)
+        gif_buffer.seek(0)
+        gif_bytes = gif_buffer.read()
+        # Save the GIF bytes to a temporary file and get its path
+        temp_gif = tempfile.NamedTemporaryFile(delete=False, suffix=".gif")
+        temp_gif.write(gif_bytes)
+        temp_gif.close()
+        gif_path = temp_gif.name
+        return first_image, last_image, gif_path, seed
     # return first_image, last_image, seed
 # ]
 def infer_tab1(prompt1, prompt2, seed, randomize_seed, guidance_scale, num_inference_steps, num_of_interpolation):
+    default_op = "Interpolation"
     return infer(prompt1, prompt2, seed, randomize_seed, guidance_scale, num_inference_steps, num_of_interpolation, default_op)
 # Wrapper for Tab 2: Uses operation_mode and fixes num_of_interpolation to 3.
     ["A robot cooking dinner in the kitchen", "An orange cat wearing sunglasses on a ship"],
 ]
+examples_2 = [
+    ["A corgi in the park", "red hat"],
+]
 css = """
 #col-container {
     margin: 0 auto;
             prompt2_tab1 = gr.Text(placeholder="Prompt for second image", label="Prompt 2")
             seed_tab1 = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
             randomize_seed_tab1 = gr.Checkbox(label="Randomize seed", value=True)
+            with gr.Accordion("Advanced Settings", open=False):
+                guidance_scale_tab1 = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=7.0, label="Guidance Scale")
+                num_inference_steps_tab1 = gr.Slider(minimum=1, maximum=50, step=1, value=25, label="Number of Inference Steps")
+                num_of_interpolation_tab1 = gr.Slider(minimum=5, maximum=50, step=1, value=10, label="Number of Images for Interpolation")
             run_button_tab1 = gr.Button("Run")
             first_image_output_tab1 = gr.Image(label="Image of the first prompt")
                 outputs=[first_image_output_tab2, last_image_output_tab2, gif_output_tab2, seed_tab2]
             )
+            gr.Examples(examples=examples_2, inputs=[prompt1_tab2, prompt2_tab2])
 if __name__ == "__main__":
     demo.launch()