Spaces:

FluoGen-Group
/

FluoGen

Sleeping

App Files Files Community

Hpsoyl commited on Jan 27

Commit

fd7a328

1 Parent(s): 3fe0e76

size

Browse files

Files changed (2) hide show

app.py +13 -3
models/pipeline_ddpm_text_encoder.py +22 -10

app.py CHANGED Viewed

@@ -406,7 +406,7 @@ def get_gallery_selection(evt: gr.SelectData):
 # --- Generation Functions ---
 @spaces.GPU(duration=120)
-def generate_t2i(prompt, num_inference_steps, num_images, current_color):
     """
     Generates multiple images for Text-to-Image and returns a gallery.
     """
@@ -415,7 +415,7 @@ def generate_t2i(prompt, num_inference_steps, num_images, current_color):
     target_model_path = PROMPT_TO_MODEL_MAP.get(prompt, f"{MODELS_ROOT_DIR}/UNET_T2I_CONTROLNET/FULL-checkpoint-275000")
     t2i_pipe = swap_t2i_unet(t2i_pipe, target_model_path)
-    print(f"\n🚀 T2I Task started... | Prompt: '{prompt}' | Count: {num_images}")
     generated_raw_list = []
     generated_display_images = []
@@ -425,7 +425,14 @@ def generate_t2i(prompt, num_inference_steps, num_images, current_color):
     # Generate Batch
     for i in range(int(num_images)):
         # Generate single image
-        image_np = t2i_pipe(prompt.lower(), generator=None, num_inference_steps=int(num_inference_steps), output_type="np").images
         generated_raw_list.append(image_np)
         # Save raw to temp
@@ -652,6 +659,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
                     t2i_steps = gr.Slider(10, 200, 50, step=1, label="Inference Steps")
                     # Added: Number of Images Slider
                     t2i_num_images = gr.Slider(1, 9, 3, step=1, label="Number of Images")
                     t2i_btn = gr.Button("Generate", variant="primary")
                 with gr.Column(scale=2):
                     # Changed: Image to Gallery

 # --- Generation Functions ---
 @spaces.GPU(duration=120)
+def generate_t2i(prompt, num_inference_steps, num_images, current_color, height, width):
     """
     Generates multiple images for Text-to-Image and returns a gallery.
     """
     target_model_path = PROMPT_TO_MODEL_MAP.get(prompt, f"{MODELS_ROOT_DIR}/UNET_T2I_CONTROLNET/FULL-checkpoint-275000")
     t2i_pipe = swap_t2i_unet(t2i_pipe, target_model_path)
+    print(f"\n🚀 T2I Task started... | Prompt: '{prompt}' | Count: {num_images} | Size: {height}x{width}")
     generated_raw_list = []
     generated_display_images = []
     # Generate Batch
     for i in range(int(num_images)):
         # Generate single image
+        image_np = t2i_pipe(
+            prompt.lower(),
+            generator=None,
+            num_inference_steps=int(num_inference_steps),
+            output_type="np",
+            height=int(height),
+            width=int(width)
+        ).images
         generated_raw_list.append(image_np)
         # Save raw to temp
                     t2i_steps = gr.Slider(10, 200, 50, step=1, label="Inference Steps")
                     # Added: Number of Images Slider
                     t2i_num_images = gr.Slider(1, 9, 3, step=1, label="Number of Images")
+                    with gr.Row():
+                        t2i_height = gr.Slider(256, 1024, value=512, step=64, label="Height")
+                        t2i_width = gr.Slider(256, 1024, value=512, step=64, label="Width")
                     t2i_btn = gr.Button("Generate", variant="primary")
                 with gr.Column(scale=2):
                     # Changed: Image to Gallery

models/pipeline_ddpm_text_encoder.py CHANGED Viewed

@@ -64,6 +64,8 @@ class DDPMPipeline(DiffusionPipeline):
         num_inference_steps: int = 1000,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
     ) -> Union[ImagePipelineOutput, Tuple]:
         r"""
         The call function to the pipeline for generation.
@@ -117,17 +119,27 @@ class DDPMPipeline(DiffusionPipeline):
         )
         text_input_ids = text_inputs.input_ids.to(self.device)
         encoder_hidden_states = self.text_encoder(text_input_ids, return_dict=False)[0]
         # Sample gaussian noise to begin loop
-        if isinstance(self.unet.config.sample_size, int):
-            image_shape = (
-                batch_size,
-                self.unet.config.in_channels,
-                self.unet.config.sample_size,
-                self.unet.config.sample_size,
-            )
-        else:
-            image_shape = (batch_size, self.unet.config.in_channels, *self.unet.config.sample_size)
         if self.device.type == "mps":
             # randn does not work reproducibly on mps

         num_inference_steps: int = 1000,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
+        height: Optional[int] = None, # <--- 新增参数
+        width: Optional[int] = None,
     ) -> Union[ImagePipelineOutput, Tuple]:
         r"""
         The call function to the pipeline for generation.
         )
         text_input_ids = text_inputs.input_ids.to(self.device)
         encoder_hidden_states = self.text_encoder(text_input_ids, return_dict=False)[0]
+        if height is None:
+            height = self.unet.config.sample_size
+        if width is None:
+            width = self.unet.config.sample_size
+        image_shape = (
+            batch_size,
+            self.unet.config.in_channels,
+            height,
+            width,
+        )
         # Sample gaussian noise to begin loop
+        # if isinstance(self.unet.config.sample_size, int):
+        #     image_shape = (
+        #         batch_size,
+        #         self.unet.config.in_channels,
+        #         self.unet.config.sample_size,
+        #         self.unet.config.sample_size,
+        #     )
+        # else:
+        #     image_shape = (batch_size, self.unet.config.in_channels, *self.unet.config.sample_size)
         if self.device.type == "mps":
             # randn does not work reproducibly on mps