Spaces:

Ngene787
/

Faice_text2face

Build error

App Files Files Community

Ngene787 commited on May 30, 2025

Commit

ae53881

1 Parent(s): 6c4a7e0

feat: add unconditional diffusion model and class guidance model

Browse files

Files changed (6) hide show

app.py +105 -14
ccddpm_pipeline.py +81 -0
class_guidance_inference.py +81 -0
stable_diffusion_inference.py +7 -7
test/test_inference.py +2 -2
unconditional_diffusion_inference.py +68 -0

app.py CHANGED Viewed

@@ -7,7 +7,9 @@
 """
 import gradio as gr
-from stable_diffusion_inference import MAX_SEED
 from inference_api import inference
 from utils import timer
@@ -87,7 +89,73 @@ with gr.Blocks(theme=theme, css=css) as demo:
                 """)
         gr.Markdown("---")
-        gr.Markdown("## Part 1. Text-to-Image Generation")
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
@@ -97,9 +165,9 @@ with gr.Blocks(theme=theme, css=css) as demo:
                 container=False,
             )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
@@ -108,7 +176,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
                 placeholder="Enter a negative prompt",
             )
-            seed = gr.Slider(
                 label="Seed",
                 minimum=0,
                 maximum=MAX_SEED,
@@ -116,7 +184,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
                 value=0,
             )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
             # with gr.Row():
             #     width = gr.Slider(
@@ -144,7 +212,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
                     value=7.5,
                 )
-                num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
                     maximum=100,
@@ -152,20 +220,43 @@ with gr.Blocks(theme=theme, css=css) as demo:
                     value=50,
                 )
-        gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=inference,
                     cache_examples=True, cache_mode="lazy")
     gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=inference,
         inputs=[
             prompt,
             negative_prompt,
-            seed,
-            randomize_seed,
             guidance_scale,
-            num_inference_steps,
         ],
-        outputs=[result, seed],
     )
 if __name__ == "__main__":

 """
 import gradio as gr
+from unconditional_diffusion_inference import inference_unconditional
+from class_guidance_inference import inference_class_guidance, GENDER_CHOICES
+from stable_diffusion_inference import inference_sd, MAX_SEED
 from inference_api import inference
 from utils import timer
                 """)
         gr.Markdown("---")
+        gr.Markdown("## Part 1. Unconditional Face Generation")
+        with gr.Row():
+            run_button_1 = gr.Button("Run", scale=0, variant="primary")
+        result_1 = gr.Image(label="Result", show_label=False)
+        with gr.Accordion("Advanced Settings", open=False):
+            seed_1 = gr.Slider(
+                label="Seed",
+                minimum=0,
+                maximum=MAX_SEED,
+                step=1,
+                value=0,
+            )
+            randomize_seed_1 = gr.Checkbox(label="Randomize seed", value=False)
+            with gr.Row():
+                num_inference_steps_1 = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=100,
+                    step=1,
+                    value=50,
+                )
+        # gr.Examples(examples=[], inputs=[seed_1], outputs=[result_1, seed_1], fn=inference_unconditional,
+        #             cache_examples=True, cache_mode="lazy")
+        gr.Markdown("---")
+        gr.Markdown("## Part 2. Class Guidance Face Generation")
+        with gr.Row():
+            gender_select_radio = gr.Radio(
+                label="Select Gender",
+                choices=GENDER_CHOICES,
+                value=GENDER_CHOICES[0]
+            )
+            run_button_2 = gr.Button("Run", scale=0, variant="primary")
+        result_2 = gr.Image(label="Result", show_label=False)
+        with gr.Accordion("Advanced Settings", open=False):
+            seed_2 = gr.Slider(
+                label="Seed",
+                minimum=0,
+                maximum=MAX_SEED,
+                step=1,
+                value=0,
+            )
+            randomize_seed_2 = gr.Checkbox(label="Randomize seed", value=False)
+            with gr.Row():
+                num_inference_steps_2 = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=100,
+                    step=1,
+                    value=50,
+                )
+        # gr.Examples(examples=[], inputs=[gender_select_radio], outputs=[result_2, seed_2],
+        #             fn=inference_class_guidance,
+        #             cache_examples=True, cache_mode="lazy")
+        gr.Markdown("---")
+        gr.Markdown("## Part 3. Text-to-Face Generation")
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 container=False,
             )
+            run_button_3 = gr.Button("Run", scale=0, variant="primary")
+        result_3 = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 placeholder="Enter a negative prompt",
             )
+            seed_3 = gr.Slider(
                 label="Seed",
                 minimum=0,
                 maximum=MAX_SEED,
                 value=0,
             )
+            randomize_seed_3 = gr.Checkbox(label="Randomize seed", value=False)
             # with gr.Row():
             #     width = gr.Slider(
                     value=7.5,
                 )
+                num_inference_steps_3 = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
                     maximum=100,
                     value=50,
                 )
+        gr.Examples(examples=examples, inputs=[prompt], outputs=[result_3, seed_3], fn=inference_sd,
                     cache_examples=True, cache_mode="lazy")
+    gr.on(
+        triggers=[run_button_1.click],
+        fn=inference_unconditional,
+        inputs=[
+            randomize_seed_1,
+            num_inference_steps_1,
+        ],
+        outputs=[result_1, seed_1],
+    )
+    gr.on(
+        triggers=[run_button_2.click],
+        fn=inference_class_guidance,
+        inputs=[
+            gender_select_radio,
+            seed_2,
+            randomize_seed_2,
+            num_inference_steps_2,
+        ],
+        outputs=[result_2, seed_2],
+    )
     gr.on(
+        triggers=[run_button_3.click, prompt.submit],
+        fn=inference_sd,
         inputs=[
             prompt,
             negative_prompt,
+            seed_3,
+            randomize_seed_3,
             guidance_scale,
+            num_inference_steps_3,
         ],
+        outputs=[result_3, seed_3],
     )
 if __name__ == "__main__":

ccddpm_pipeline.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import List, Optional, Union, Tuple
+import torch
+from diffusers import DDPMPipeline, ImagePipelineOutput, UNet2DConditionModel
+from diffusers.utils.torch_utils import randn_tensor
+class CCDDPMPipeline(DDPMPipeline):
+    def __init__(self, unet, scheduler):
+        if not isinstance(unet, UNet2DConditionModel):
+            raise ValueError(
+                "CCDDPMPipeline requires a UNet2DConditionModel for class conditioning."
+            )
+        super().__init__(unet, scheduler)
+    # overwrite the __call__method to accept class labels and encoder hidden states.
+    @torch.no_grad()
+    def __call__(
+        self,
+        batch_size: int = 1,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        num_inference_steps: int = 1000,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        *,
+        class_labels: torch.LongTensor,
+        encoder_hidden_states: torch.FloatTensor,
+    ) -> Union[ImagePipelineOutput, Tuple]:
+        # Determine shape for initial noise
+        if isinstance(self.unet.config.sample_size, int):
+            image_shape = (
+                batch_size,
+                self.unet.config.in_channels,
+                self.unet.config.sample_size,
+                self.unet.config.sample_size,
+            )
+        else:
+            image_shape = (
+                batch_size,
+                self.unet.config.in_channels,
+                *self.unet.config.sample_size,
+            )
+        # Sample gaussian noise to begin loop
+        if self.device.type == "mps":
+            image = randn_tensor(
+                image_shape, generator=generator, dtype=self.unet.dtype
+            )
+            image = image.to(self.device)
+        else:
+            image = randn_tensor(
+                image_shape,
+                generator=generator,
+                device=self.device,
+                dtype=self.unet.dtype,
+            )
+        # set step values
+        self.scheduler.set_timesteps(num_inference_steps)
+        # Denoising loop
+        for t in self.progress_bar(self.scheduler.timesteps):
+            model_output = self.unet(
+                image,
+                t,
+                encoder_hidden_states=encoder_hidden_states,
+                class_labels=class_labels,
+            ).sample
+            image = self.scheduler.step(
+                model_output, t, image, generator=generator
+            ).prev_sample
+        # Post-process to image
+        image = (image / 2 + 0.5).clamp(0, 1)
+        image = image.cpu().permute(0, 2, 3, 1).numpy()
+        if output_type == "pil":
+            image = self.numpy_to_pil(image)
+        if not return_dict:
+            return (image,)
+        return ImagePipelineOutput(images=image)

class_guidance_inference.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# -*- coding: UTF-8 -*-
+"""
+@Time : 30/05/2025 19:24
+@Author : xiaoguangliang
+@File : class_guidance_inference.py
+@Project : Faice_text2face
+"""
+import torch
+import random
+import numpy as np
+from ccddpm_pipeline import CCDDPMPipeline
+from accelerate import Accelerator
+import gradio as gr
+import spaces
+from loguru import logger
+from utils import timer
+model_path = 'Ngene787/Faice_class_guidance'
+if torch.backends.mps.is_available():
+    accelerator = Accelerator(gradient_accumulation_steps=1)
+else:
+    accelerator = Accelerator(mixed_precision="fp16", gradient_accumulation_steps=1)
+logger.info("Loading model ...")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+if torch.cuda.is_available():
+    torch_dtype = torch.float16
+else:
+    torch_dtype = torch.float32
+pipe = CCDDPMPipeline.from_pretrained(model_path, torch_dtype=torch_dtype,
+                                      low_cpu_mem_usage=True
+                                      )
+pipe = pipe.to(device)
+pipe = accelerator.prepare(pipe)
+# Enable memory-efficient attention
+# pipe.enable_xformers_memory_efficient_attention()
+MAX_SEED = np.iinfo(np.int32).max
+GENDER_CHOICES = [
+    "Female",
+    "Male"
+]
+@spaces.GPU(duration=65)
+def inference_class_guidance(label_name,
+                             seed=0,
+                             randomize_seed=False,
+                             num_inference_steps=20,
+                             progress=gr.Progress(track_tqdm=True), ):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator().manual_seed(seed)
+    label_id = 1 if label_name == "Male" else 0
+    logger.info('Generating image ...')
+    batch_size = 1
+    with timer("inference"):
+        class_labels = torch.full(
+            (batch_size,), label_id, dtype=torch.long, device=device
+        )
+        encoder_hidden_states = torch.zeros(
+            batch_size,
+            1,
+            pipe.unet.config.cross_attention_dim,
+            device=device,
+        )
+        image = pipe(
+            batch_size=batch_size,
+            generator=generator,
+            num_inference_steps=num_inference_steps,
+            class_labels=class_labels,
+            encoder_hidden_states=encoder_hidden_states,
+        ).images[0]
+    return image, seed

stable_diffusion_inference.py CHANGED Viewed

@@ -44,13 +44,13 @@ MAX_SEED = np.iinfo(np.int32).max
 @spaces.GPU(duration=65)
-def inference(prompt,
-              negative_prompt="",
-              seed=0,
-              randomize_seed=False,
-              guidance_scale=7.5,
-              num_inference_steps=20,
-              progress=gr.Progress(track_tqdm=True), ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)

 @spaces.GPU(duration=65)
+def inference_sd(prompt,
+                 negative_prompt="",
+                 seed=0,
+                 randomize_seed=False,
+                 guidance_scale=7.5,
+                 num_inference_steps=20,
+                 progress=gr.Progress(track_tqdm=True), ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)

test/test_inference.py CHANGED Viewed

@@ -5,12 +5,12 @@
 @File : test_inference.py
 @Project : Faice_text2face
 """
-from stable_diffusion_inference import inference
 from utils import timer
 prompt = "Portrait of a young woman with long wavy hair, soft studio lighting, high contrast, 4k resolution, professional headshot"
 # prompt = "Close-up of a smiling man with sharp jawline, cinematic lighting, shallow depth of field, bokeh background"
 with timer("Test inference"):
-    image, seed = inference(prompt)
     image.save("test.png")

 @File : test_inference.py
 @Project : Faice_text2face
 """
+from stable_diffusion_inference import inference_sd
 from utils import timer
 prompt = "Portrait of a young woman with long wavy hair, soft studio lighting, high contrast, 4k resolution, professional headshot"
 # prompt = "Close-up of a smiling man with sharp jawline, cinematic lighting, shallow depth of field, bokeh background"
 with timer("Test inference"):
+    image, seed = inference_sd(prompt)
     image.save("test.png")

unconditional_diffusion_inference.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# -*- coding: UTF-8 -*-
+"""
+@Time : 30/05/2025 19:24
+@Author : xiaoguangliang
+@File : unconditional_diffusion_inference.py
+@Project : Faice_text2face
+"""
+import torch
+import random
+import numpy as np
+from diffusers import DDPMPipeline
+from accelerate import Accelerator
+import gradio as gr
+import spaces
+import PIL.Image
+from loguru import logger
+from utils import timer
+model_path = 'Ngene787/Faice_unconditional_diffusion'
+if torch.backends.mps.is_available():
+    accelerator = Accelerator(gradient_accumulation_steps=1)
+else:
+    accelerator = Accelerator(mixed_precision="fp16", gradient_accumulation_steps=1)
+logger.info("Loading model ...")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+if torch.cuda.is_available():
+    torch_dtype = torch.float16
+else:
+    torch_dtype = torch.float32
+pipe = DDPMPipeline.from_pretrained(model_path, torch_dtype=torch_dtype,
+                                    low_cpu_mem_usage=True
+                                    )
+pipe = pipe.to(device)
+pipe = accelerator.prepare(pipe)
+# Enable memory-efficient attention
+# pipe.enable_xformers_memory_efficient_attention()
+MAX_SEED = np.iinfo(np.int32).max
+@spaces.GPU(duration=65)
+def inference_unconditional(seed,
+                            randomize_seed=False,
+                            num_inference_steps=20,
+                            progress=gr.Progress(track_tqdm=True), ):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator().manual_seed(seed)
+    logger.info('Generating image ...')
+    with timer("inference"):
+        image = pipe(
+            batch_size=1,
+            generator=generator,
+            num_inference_steps=num_inference_steps,
+            output_type="np",
+        ).images[0]
+        # image = torch.tensor(image, device=device)
+        # image = image.permute(0, 3, 1, 2)
+        # images_uint8 = (image * 255).astype(np.uint8)
+        # image = PIL.Image.fromarray(images_uint8)
+    return image, seed