Ngene787 commited on
Commit
ae53881
·
1 Parent(s): 6c4a7e0

feat: add unconditional diffusion model and class guidance model

Browse files
app.py CHANGED
@@ -7,7 +7,9 @@
7
  """
8
  import gradio as gr
9
 
10
- from stable_diffusion_inference import MAX_SEED
 
 
11
  from inference_api import inference
12
  from utils import timer
13
 
@@ -87,7 +89,73 @@ with gr.Blocks(theme=theme, css=css) as demo:
87
  """)
88
 
89
  gr.Markdown("---")
90
- gr.Markdown("## Part 1. Text-to-Image Generation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  with gr.Row():
92
  prompt = gr.Text(
93
  label="Prompt",
@@ -97,9 +165,9 @@ with gr.Blocks(theme=theme, css=css) as demo:
97
  container=False,
98
  )
99
 
100
- run_button = gr.Button("Run", scale=0, variant="primary")
101
 
102
- result = gr.Image(label="Result", show_label=False)
103
 
104
  with gr.Accordion("Advanced Settings", open=False):
105
  negative_prompt = gr.Text(
@@ -108,7 +176,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
108
  placeholder="Enter a negative prompt",
109
  )
110
 
111
- seed = gr.Slider(
112
  label="Seed",
113
  minimum=0,
114
  maximum=MAX_SEED,
@@ -116,7 +184,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
116
  value=0,
117
  )
118
 
119
- randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
120
 
121
  # with gr.Row():
122
  # width = gr.Slider(
@@ -144,7 +212,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
144
  value=7.5,
145
  )
146
 
147
- num_inference_steps = gr.Slider(
148
  label="Number of inference steps",
149
  minimum=1,
150
  maximum=100,
@@ -152,20 +220,43 @@ with gr.Blocks(theme=theme, css=css) as demo:
152
  value=50,
153
  )
154
 
155
- gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=inference,
156
  cache_examples=True, cache_mode="lazy")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  gr.on(
158
- triggers=[run_button.click, prompt.submit],
159
- fn=inference,
160
  inputs=[
161
  prompt,
162
  negative_prompt,
163
- seed,
164
- randomize_seed,
165
  guidance_scale,
166
- num_inference_steps,
167
  ],
168
- outputs=[result, seed],
169
  )
170
 
171
  if __name__ == "__main__":
 
7
  """
8
  import gradio as gr
9
 
10
+ from unconditional_diffusion_inference import inference_unconditional
11
+ from class_guidance_inference import inference_class_guidance, GENDER_CHOICES
12
+ from stable_diffusion_inference import inference_sd, MAX_SEED
13
  from inference_api import inference
14
  from utils import timer
15
 
 
89
  """)
90
 
91
  gr.Markdown("---")
92
+ gr.Markdown("## Part 1. Unconditional Face Generation")
93
+ with gr.Row():
94
+ run_button_1 = gr.Button("Run", scale=0, variant="primary")
95
+
96
+ result_1 = gr.Image(label="Result", show_label=False)
97
+
98
+ with gr.Accordion("Advanced Settings", open=False):
99
+ seed_1 = gr.Slider(
100
+ label="Seed",
101
+ minimum=0,
102
+ maximum=MAX_SEED,
103
+ step=1,
104
+ value=0,
105
+ )
106
+
107
+ randomize_seed_1 = gr.Checkbox(label="Randomize seed", value=False)
108
+
109
+ with gr.Row():
110
+ num_inference_steps_1 = gr.Slider(
111
+ label="Number of inference steps",
112
+ minimum=1,
113
+ maximum=100,
114
+ step=1,
115
+ value=50,
116
+ )
117
+
118
+ # gr.Examples(examples=[], inputs=[seed_1], outputs=[result_1, seed_1], fn=inference_unconditional,
119
+ # cache_examples=True, cache_mode="lazy")
120
+
121
+ gr.Markdown("---")
122
+ gr.Markdown("## Part 2. Class Guidance Face Generation")
123
+ with gr.Row():
124
+ gender_select_radio = gr.Radio(
125
+ label="Select Gender",
126
+ choices=GENDER_CHOICES,
127
+ value=GENDER_CHOICES[0]
128
+ )
129
+ run_button_2 = gr.Button("Run", scale=0, variant="primary")
130
+
131
+ result_2 = gr.Image(label="Result", show_label=False)
132
+
133
+ with gr.Accordion("Advanced Settings", open=False):
134
+ seed_2 = gr.Slider(
135
+ label="Seed",
136
+ minimum=0,
137
+ maximum=MAX_SEED,
138
+ step=1,
139
+ value=0,
140
+ )
141
+
142
+ randomize_seed_2 = gr.Checkbox(label="Randomize seed", value=False)
143
+
144
+ with gr.Row():
145
+ num_inference_steps_2 = gr.Slider(
146
+ label="Number of inference steps",
147
+ minimum=1,
148
+ maximum=100,
149
+ step=1,
150
+ value=50,
151
+ )
152
+
153
+ # gr.Examples(examples=[], inputs=[gender_select_radio], outputs=[result_2, seed_2],
154
+ # fn=inference_class_guidance,
155
+ # cache_examples=True, cache_mode="lazy")
156
+
157
+ gr.Markdown("---")
158
+ gr.Markdown("## Part 3. Text-to-Face Generation")
159
  with gr.Row():
160
  prompt = gr.Text(
161
  label="Prompt",
 
165
  container=False,
166
  )
167
 
168
+ run_button_3 = gr.Button("Run", scale=0, variant="primary")
169
 
170
+ result_3 = gr.Image(label="Result", show_label=False)
171
 
172
  with gr.Accordion("Advanced Settings", open=False):
173
  negative_prompt = gr.Text(
 
176
  placeholder="Enter a negative prompt",
177
  )
178
 
179
+ seed_3 = gr.Slider(
180
  label="Seed",
181
  minimum=0,
182
  maximum=MAX_SEED,
 
184
  value=0,
185
  )
186
 
187
+ randomize_seed_3 = gr.Checkbox(label="Randomize seed", value=False)
188
 
189
  # with gr.Row():
190
  # width = gr.Slider(
 
212
  value=7.5,
213
  )
214
 
215
+ num_inference_steps_3 = gr.Slider(
216
  label="Number of inference steps",
217
  minimum=1,
218
  maximum=100,
 
220
  value=50,
221
  )
222
 
223
+ gr.Examples(examples=examples, inputs=[prompt], outputs=[result_3, seed_3], fn=inference_sd,
224
  cache_examples=True, cache_mode="lazy")
225
+
226
+ gr.on(
227
+ triggers=[run_button_1.click],
228
+ fn=inference_unconditional,
229
+ inputs=[
230
+ randomize_seed_1,
231
+ num_inference_steps_1,
232
+ ],
233
+ outputs=[result_1, seed_1],
234
+ )
235
+
236
+ gr.on(
237
+ triggers=[run_button_2.click],
238
+ fn=inference_class_guidance,
239
+ inputs=[
240
+ gender_select_radio,
241
+ seed_2,
242
+ randomize_seed_2,
243
+ num_inference_steps_2,
244
+ ],
245
+ outputs=[result_2, seed_2],
246
+ )
247
+
248
  gr.on(
249
+ triggers=[run_button_3.click, prompt.submit],
250
+ fn=inference_sd,
251
  inputs=[
252
  prompt,
253
  negative_prompt,
254
+ seed_3,
255
+ randomize_seed_3,
256
  guidance_scale,
257
+ num_inference_steps_3,
258
  ],
259
+ outputs=[result_3, seed_3],
260
  )
261
 
262
  if __name__ == "__main__":
ccddpm_pipeline.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Union, Tuple
2
+ import torch
3
+ from diffusers import DDPMPipeline, ImagePipelineOutput, UNet2DConditionModel
4
+ from diffusers.utils.torch_utils import randn_tensor
5
+
6
+
7
+ class CCDDPMPipeline(DDPMPipeline):
8
+ def __init__(self, unet, scheduler):
9
+ if not isinstance(unet, UNet2DConditionModel):
10
+ raise ValueError(
11
+ "CCDDPMPipeline requires a UNet2DConditionModel for class conditioning."
12
+ )
13
+ super().__init__(unet, scheduler)
14
+
15
+ # overwrite the __call__method to accept class labels and encoder hidden states.
16
+ @torch.no_grad()
17
+ def __call__(
18
+ self,
19
+ batch_size: int = 1,
20
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
21
+ num_inference_steps: int = 1000,
22
+ output_type: Optional[str] = "pil",
23
+ return_dict: bool = True,
24
+ *,
25
+ class_labels: torch.LongTensor,
26
+ encoder_hidden_states: torch.FloatTensor,
27
+ ) -> Union[ImagePipelineOutput, Tuple]:
28
+ # Determine shape for initial noise
29
+ if isinstance(self.unet.config.sample_size, int):
30
+ image_shape = (
31
+ batch_size,
32
+ self.unet.config.in_channels,
33
+ self.unet.config.sample_size,
34
+ self.unet.config.sample_size,
35
+ )
36
+ else:
37
+ image_shape = (
38
+ batch_size,
39
+ self.unet.config.in_channels,
40
+ *self.unet.config.sample_size,
41
+ )
42
+
43
+ # Sample gaussian noise to begin loop
44
+ if self.device.type == "mps":
45
+ image = randn_tensor(
46
+ image_shape, generator=generator, dtype=self.unet.dtype
47
+ )
48
+ image = image.to(self.device)
49
+ else:
50
+ image = randn_tensor(
51
+ image_shape,
52
+ generator=generator,
53
+ device=self.device,
54
+ dtype=self.unet.dtype,
55
+ )
56
+
57
+ # set step values
58
+ self.scheduler.set_timesteps(num_inference_steps)
59
+
60
+ # Denoising loop
61
+ for t in self.progress_bar(self.scheduler.timesteps):
62
+ model_output = self.unet(
63
+ image,
64
+ t,
65
+ encoder_hidden_states=encoder_hidden_states,
66
+ class_labels=class_labels,
67
+ ).sample
68
+ image = self.scheduler.step(
69
+ model_output, t, image, generator=generator
70
+ ).prev_sample
71
+
72
+ # Post-process to image
73
+ image = (image / 2 + 0.5).clamp(0, 1)
74
+ image = image.cpu().permute(0, 2, 3, 1).numpy()
75
+ if output_type == "pil":
76
+ image = self.numpy_to_pil(image)
77
+
78
+ if not return_dict:
79
+ return (image,)
80
+
81
+ return ImagePipelineOutput(images=image)
class_guidance_inference.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: UTF-8 -*-
2
+ """
3
+ @Time : 30/05/2025 19:24
4
+ @Author : xiaoguangliang
5
+ @File : class_guidance_inference.py
6
+ @Project : Faice_text2face
7
+ """
8
+ import torch
9
+ import random
10
+ import numpy as np
11
+ from ccddpm_pipeline import CCDDPMPipeline
12
+ from accelerate import Accelerator
13
+ import gradio as gr
14
+ import spaces
15
+ from loguru import logger
16
+
17
+ from utils import timer
18
+
19
+ model_path = 'Ngene787/Faice_class_guidance'
20
+
21
+ if torch.backends.mps.is_available():
22
+ accelerator = Accelerator(gradient_accumulation_steps=1)
23
+ else:
24
+ accelerator = Accelerator(mixed_precision="fp16", gradient_accumulation_steps=1)
25
+
26
+ logger.info("Loading model ...")
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+ if torch.cuda.is_available():
29
+ torch_dtype = torch.float16
30
+ else:
31
+ torch_dtype = torch.float32
32
+ pipe = CCDDPMPipeline.from_pretrained(model_path, torch_dtype=torch_dtype,
33
+ low_cpu_mem_usage=True
34
+ )
35
+ pipe = pipe.to(device)
36
+
37
+ pipe = accelerator.prepare(pipe)
38
+ # Enable memory-efficient attention
39
+ # pipe.enable_xformers_memory_efficient_attention()
40
+
41
+
42
+ MAX_SEED = np.iinfo(np.int32).max
43
+
44
+ GENDER_CHOICES = [
45
+ "Female",
46
+ "Male"
47
+ ]
48
+
49
+
50
+ @spaces.GPU(duration=65)
51
+ def inference_class_guidance(label_name,
52
+ seed=0,
53
+ randomize_seed=False,
54
+ num_inference_steps=20,
55
+ progress=gr.Progress(track_tqdm=True), ):
56
+ if randomize_seed:
57
+ seed = random.randint(0, MAX_SEED)
58
+
59
+ generator = torch.Generator().manual_seed(seed)
60
+ label_id = 1 if label_name == "Male" else 0
61
+
62
+ logger.info('Generating image ...')
63
+ batch_size = 1
64
+ with timer("inference"):
65
+ class_labels = torch.full(
66
+ (batch_size,), label_id, dtype=torch.long, device=device
67
+ )
68
+ encoder_hidden_states = torch.zeros(
69
+ batch_size,
70
+ 1,
71
+ pipe.unet.config.cross_attention_dim,
72
+ device=device,
73
+ )
74
+ image = pipe(
75
+ batch_size=batch_size,
76
+ generator=generator,
77
+ num_inference_steps=num_inference_steps,
78
+ class_labels=class_labels,
79
+ encoder_hidden_states=encoder_hidden_states,
80
+ ).images[0]
81
+ return image, seed
stable_diffusion_inference.py CHANGED
@@ -44,13 +44,13 @@ MAX_SEED = np.iinfo(np.int32).max
44
 
45
 
46
  @spaces.GPU(duration=65)
47
- def inference(prompt,
48
- negative_prompt="",
49
- seed=0,
50
- randomize_seed=False,
51
- guidance_scale=7.5,
52
- num_inference_steps=20,
53
- progress=gr.Progress(track_tqdm=True), ):
54
  if randomize_seed:
55
  seed = random.randint(0, MAX_SEED)
56
 
 
44
 
45
 
46
  @spaces.GPU(duration=65)
47
+ def inference_sd(prompt,
48
+ negative_prompt="",
49
+ seed=0,
50
+ randomize_seed=False,
51
+ guidance_scale=7.5,
52
+ num_inference_steps=20,
53
+ progress=gr.Progress(track_tqdm=True), ):
54
  if randomize_seed:
55
  seed = random.randint(0, MAX_SEED)
56
 
test/test_inference.py CHANGED
@@ -5,12 +5,12 @@
5
  @File : test_inference.py
6
  @Project : Faice_text2face
7
  """
8
- from stable_diffusion_inference import inference
9
  from utils import timer
10
 
11
  prompt = "Portrait of a young woman with long wavy hair, soft studio lighting, high contrast, 4k resolution, professional headshot"
12
  # prompt = "Close-up of a smiling man with sharp jawline, cinematic lighting, shallow depth of field, bokeh background"
13
 
14
  with timer("Test inference"):
15
- image, seed = inference(prompt)
16
  image.save("test.png")
 
5
  @File : test_inference.py
6
  @Project : Faice_text2face
7
  """
8
+ from stable_diffusion_inference import inference_sd
9
  from utils import timer
10
 
11
  prompt = "Portrait of a young woman with long wavy hair, soft studio lighting, high contrast, 4k resolution, professional headshot"
12
  # prompt = "Close-up of a smiling man with sharp jawline, cinematic lighting, shallow depth of field, bokeh background"
13
 
14
  with timer("Test inference"):
15
+ image, seed = inference_sd(prompt)
16
  image.save("test.png")
unconditional_diffusion_inference.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: UTF-8 -*-
2
+ """
3
+ @Time : 30/05/2025 19:24
4
+ @Author : xiaoguangliang
5
+ @File : unconditional_diffusion_inference.py
6
+ @Project : Faice_text2face
7
+ """
8
+ import torch
9
+ import random
10
+ import numpy as np
11
+ from diffusers import DDPMPipeline
12
+ from accelerate import Accelerator
13
+ import gradio as gr
14
+ import spaces
15
+ import PIL.Image
16
+ from loguru import logger
17
+
18
+ from utils import timer
19
+
20
+ model_path = 'Ngene787/Faice_unconditional_diffusion'
21
+
22
+ if torch.backends.mps.is_available():
23
+ accelerator = Accelerator(gradient_accumulation_steps=1)
24
+ else:
25
+ accelerator = Accelerator(mixed_precision="fp16", gradient_accumulation_steps=1)
26
+
27
+ logger.info("Loading model ...")
28
+ device = "cuda" if torch.cuda.is_available() else "cpu"
29
+ if torch.cuda.is_available():
30
+ torch_dtype = torch.float16
31
+ else:
32
+ torch_dtype = torch.float32
33
+ pipe = DDPMPipeline.from_pretrained(model_path, torch_dtype=torch_dtype,
34
+ low_cpu_mem_usage=True
35
+ )
36
+ pipe = pipe.to(device)
37
+
38
+ pipe = accelerator.prepare(pipe)
39
+ # Enable memory-efficient attention
40
+ # pipe.enable_xformers_memory_efficient_attention()
41
+
42
+
43
+ MAX_SEED = np.iinfo(np.int32).max
44
+
45
+
46
+ @spaces.GPU(duration=65)
47
+ def inference_unconditional(seed,
48
+ randomize_seed=False,
49
+ num_inference_steps=20,
50
+ progress=gr.Progress(track_tqdm=True), ):
51
+ if randomize_seed:
52
+ seed = random.randint(0, MAX_SEED)
53
+
54
+ generator = torch.Generator().manual_seed(seed)
55
+
56
+ logger.info('Generating image ...')
57
+ with timer("inference"):
58
+ image = pipe(
59
+ batch_size=1,
60
+ generator=generator,
61
+ num_inference_steps=num_inference_steps,
62
+ output_type="np",
63
+ ).images[0]
64
+ # image = torch.tensor(image, device=device)
65
+ # image = image.permute(0, 3, 1, 2)
66
+ # images_uint8 = (image * 255).astype(np.uint8)
67
+ # image = PIL.Image.fromarray(images_uint8)
68
+ return image, seed