[Admin maintenance] Support new ZeroGPU hardware

#5
by multimodalart HF Staff - opened
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +13 -10
  3. flash_flow_match_scheduler.py +280 -0
  4. requirements.txt +5 -9
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: ⚡
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.34.1
8
  python_version: 3.12
9
  app_file: app.py
10
  pinned: false
 
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.49.1
8
  python_version: 3.12
9
  app_file: app.py
10
  pinned: false
app.py CHANGED
@@ -1,24 +1,27 @@
 
1
  import random
 
2
  import spaces
3
 
4
  import gradio as gr
5
  import numpy as np
6
  import torch
7
- from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel, FlashFlowMatchEulerDiscreteScheduler
8
  from peft import PeftModel
9
- import os
10
  from huggingface_hub import snapshot_download
11
 
12
- huggingface_token = os.getenv("HUGGINFACE_TOKEN")
 
 
13
 
14
  model_path = snapshot_download(
15
- repo_id="stabilityai/stable-diffusion-3-medium",
16
  revision="refs/pr/26",
17
- repo_type="model",
18
  ignore_patterns=["*.md", "*..gitattributes"],
19
  local_dir="stable-diffusion-3-medium",
20
- token=huggingface_token, # type a new token-id.
21
- )
22
 
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
  IS_SPACE = os.environ.get("SPACE_ID", None) is not None
@@ -149,7 +152,7 @@ with gr.Blocks(css=css) as demo:
149
  placeholder="Enter a negative prompt",
150
  value="deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, NSFW, bad text"
151
  )
152
-
153
  seed = gr.Slider(
154
  label="Seed",
155
  minimum=0,
@@ -161,7 +164,7 @@ with gr.Blocks(css=css) as demo:
161
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
162
 
163
  with gr.Row():
164
-
165
  guidance_scale = gr.Slider(
166
  label="Guidance scale",
167
  minimum=0.0,
@@ -169,7 +172,7 @@ with gr.Blocks(css=css) as demo:
169
  step=0.1,
170
  value=1.0,
171
  )
172
-
173
  num_inference_steps = gr.Slider(
174
  label="Number of inference steps",
175
  minimum=4,
 
1
+ import os
2
  import random
3
+
4
  import spaces
5
 
6
  import gradio as gr
7
  import numpy as np
8
  import torch
9
+ from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel
10
  from peft import PeftModel
 
11
  from huggingface_hub import snapshot_download
12
 
13
+ from flash_flow_match_scheduler import FlashFlowMatchEulerDiscreteScheduler
14
+
15
+ huggingface_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINFACE_TOKEN")
16
 
17
  model_path = snapshot_download(
18
+ repo_id="stabilityai/stable-diffusion-3-medium",
19
  revision="refs/pr/26",
20
+ repo_type="model",
21
  ignore_patterns=["*.md", "*..gitattributes"],
22
  local_dir="stable-diffusion-3-medium",
23
+ token=huggingface_token,
24
+ )
25
 
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  IS_SPACE = os.environ.get("SPACE_ID", None) is not None
 
152
  placeholder="Enter a negative prompt",
153
  value="deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, NSFW, bad text"
154
  )
155
+
156
  seed = gr.Slider(
157
  label="Seed",
158
  minimum=0,
 
164
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
165
 
166
  with gr.Row():
167
+
168
  guidance_scale = gr.Slider(
169
  label="Guidance scale",
170
  minimum=0.0,
 
172
  step=0.1,
173
  value=1.0,
174
  )
175
+
176
  num_inference_steps = gr.Slider(
177
  label="Number of inference steps",
178
  minimum=4,
flash_flow_match_scheduler.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Optional, Tuple, Union
17
+
18
+ import numpy as np
19
+ import torch
20
+
21
+ from diffusers.configuration_utils import ConfigMixin, register_to_config
22
+ from diffusers.utils import BaseOutput, logging
23
+ from diffusers.utils.torch_utils import randn_tensor
24
+ from diffusers.schedulers.scheduling_utils import SchedulerMixin
25
+
26
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
27
+
28
+
29
+ @dataclass
30
+ class FlashFlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
31
+ """
32
+ Output class for the scheduler's `step` function output.
33
+
34
+ Args:
35
+ prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
36
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
37
+ denoising loop.
38
+ pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
39
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
40
+ `pred_original_sample` can be used to preview progress or for guidance.
41
+ """
42
+
43
+ prev_sample: torch.FloatTensor
44
+
45
+
46
+ class FlashFlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
47
+ """
48
+ Euler scheduler.
49
+
50
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
51
+ methods the library implements for all schedulers such as loading and saving.
52
+
53
+ Args:
54
+ num_train_timesteps (`int`, defaults to 1000):
55
+ The number of diffusion steps to train the model.
56
+ timestep_spacing (`str`, defaults to `"linspace"`):
57
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
58
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
59
+ shift (`float`, defaults to 1.0):
60
+ The shift value for the timestep schedule.
61
+ """
62
+
63
+ _compatibles = []
64
+ order = 1
65
+
66
+ @register_to_config
67
+ def __init__(
68
+ self,
69
+ num_train_timesteps: int = 1000,
70
+ shift: float = 1.0,
71
+ ):
72
+ timesteps = np.linspace(
73
+ 1, num_train_timesteps, num_train_timesteps, dtype=np.float32
74
+ )[::-1].copy()
75
+ timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32)
76
+
77
+ sigmas = timesteps / num_train_timesteps
78
+ sigmas = shift * sigmas / (1 + (shift - 1) * sigmas)
79
+
80
+ self.timesteps = sigmas * num_train_timesteps
81
+
82
+ self._step_index = None
83
+ self._begin_index = None
84
+
85
+ self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication
86
+ self.sigma_min = self.sigmas[-1].item()
87
+ self.sigma_max = self.sigmas[0].item()
88
+
89
+ @property
90
+ def step_index(self):
91
+ """
92
+ The index counter for current timestep. It will increase 1 after each scheduler step.
93
+ """
94
+ return self._step_index
95
+
96
+ @property
97
+ def begin_index(self):
98
+ """
99
+ The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
100
+ """
101
+ return self._begin_index
102
+
103
+ # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
104
+ def set_begin_index(self, begin_index: int = 0):
105
+ """
106
+ Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
107
+
108
+ Args:
109
+ begin_index (`int`):
110
+ The begin index for the scheduler.
111
+ """
112
+ self._begin_index = begin_index
113
+
114
+ def scale_noise(
115
+ self,
116
+ sample: torch.FloatTensor,
117
+ timestep: Union[float, torch.FloatTensor],
118
+ noise: Optional[torch.FloatTensor] = None,
119
+ ) -> torch.FloatTensor:
120
+ """
121
+ Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
122
+ current timestep. Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm.
123
+
124
+ Args:
125
+ sample (`torch.FloatTensor`):
126
+ The input sample.
127
+ timestep (`int`, *optional*):
128
+ The current timestep in the diffusion chain.
129
+
130
+ Returns:
131
+ `torch.FloatTensor`:
132
+ A scaled input sample.
133
+ """
134
+ if self.step_index is None:
135
+ self._init_step_index(timestep)
136
+
137
+ sigma = self.sigmas[self.step_index]
138
+ sample = sigma * noise + (1.0 - sigma) * sample
139
+
140
+ return sample
141
+
142
+ def _sigma_to_t(self, sigma):
143
+ return sigma * self.config.num_train_timesteps
144
+
145
+ def set_timesteps(
146
+ self, num_inference_steps: int, device: Union[str, torch.device] = None
147
+ ):
148
+ """
149
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
150
+
151
+ Args:
152
+ num_inference_steps (`int`):
153
+ The number of diffusion steps used when generating samples with a pre-trained model.
154
+ device (`str` or `torch.device`, *optional*):
155
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
156
+ """
157
+ self.num_inference_steps = num_inference_steps
158
+
159
+ timesteps = np.linspace(
160
+ self._sigma_to_t(self.sigma_max),
161
+ self._sigma_to_t(self.sigma_min),
162
+ num_inference_steps,
163
+ )
164
+
165
+ sigmas = timesteps / self.config.num_train_timesteps
166
+ sigmas = self.config.shift * sigmas / (1 + (self.config.shift - 1) * sigmas)
167
+ sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device)
168
+
169
+ timesteps = sigmas * self.config.num_train_timesteps
170
+ self.timesteps = timesteps.to(device=device)
171
+ self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)])
172
+
173
+ self._step_index = None
174
+ self._begin_index = None
175
+
176
+ def index_for_timestep(self, timestep, schedule_timesteps=None):
177
+ if schedule_timesteps is None:
178
+ schedule_timesteps = self.timesteps
179
+
180
+ indices = (schedule_timesteps == timestep).nonzero()
181
+
182
+ # The sigma index that is taken for the **very** first `step`
183
+ # is always the second index (or the last index if there is only 1)
184
+ # This way we can ensure we don't accidentally skip a sigma in
185
+ # case we start in the middle of the denoising schedule (e.g. for image-to-image)
186
+ pos = 1 if len(indices) > 1 else 0
187
+
188
+ return indices[pos].item()
189
+
190
+ def _init_step_index(self, timestep):
191
+ if self.begin_index is None:
192
+ if isinstance(timestep, torch.Tensor):
193
+ timestep = timestep.to(self.timesteps.device)
194
+ self._step_index = self.index_for_timestep(timestep)
195
+ else:
196
+ self._step_index = self._begin_index
197
+
198
+ def step(
199
+ self,
200
+ model_output: torch.FloatTensor,
201
+ timestep: Union[float, torch.FloatTensor],
202
+ sample: torch.FloatTensor,
203
+ s_churn: float = 0.0,
204
+ s_tmin: float = 0.0,
205
+ s_tmax: float = float("inf"),
206
+ s_noise: float = 1.0,
207
+ generator: Optional[torch.Generator] = None,
208
+ return_dict: bool = True,
209
+ ) -> Union[FlashFlowMatchEulerDiscreteSchedulerOutput, Tuple]:
210
+ """
211
+ Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
212
+ process from the learned model outputs (most often the predicted noise).
213
+
214
+ Args:
215
+ model_output (`torch.FloatTensor`):
216
+ The direct output from learned diffusion model.
217
+ timestep (`float`):
218
+ The current discrete timestep in the diffusion chain.
219
+ sample (`torch.FloatTensor`):
220
+ A current instance of a sample created by the diffusion process.
221
+ s_churn (`float`):
222
+ s_tmin (`float`):
223
+ s_tmax (`float`):
224
+ s_noise (`float`, defaults to 1.0):
225
+ Scaling factor for noise added to the sample.
226
+ generator (`torch.Generator`, *optional*):
227
+ A random number generator.
228
+ return_dict (`bool`):
229
+ Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
230
+ tuple.
231
+
232
+ Returns:
233
+ [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
234
+ If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
235
+ returned, otherwise a tuple is returned where the first element is the sample tensor.
236
+ """
237
+
238
+ if (
239
+ isinstance(timestep, int)
240
+ or isinstance(timestep, torch.IntTensor)
241
+ or isinstance(timestep, torch.LongTensor)
242
+ ):
243
+ raise ValueError(
244
+ (
245
+ "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
246
+ " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass"
247
+ " one of the `scheduler.timesteps` as a timestep."
248
+ ),
249
+ )
250
+
251
+ if self.step_index is None:
252
+ self._init_step_index(timestep)
253
+
254
+ sigma = self.sigmas[self.step_index]
255
+
256
+ # Upcast to avoid precision issues when computing prev_sample
257
+ # sample = sample.to(torch.float32
258
+
259
+ sample = sample - model_output * sigma
260
+
261
+ if self.step_index < self.num_inference_steps - 1:
262
+ sigma_next = self.sigmas[self.step_index + 1]
263
+ noise = randn_tensor(
264
+ model_output.shape,
265
+ generator=generator,
266
+ device=model_output.device,
267
+ dtype=sample.dtype,
268
+ )
269
+ sample = sigma_next * noise + (1.0 - sigma_next) * sample
270
+
271
+ # upon completion increase step index by one
272
+ self._step_index += 1
273
+
274
+ if not return_dict:
275
+ return (sample,)
276
+
277
+ return FlashFlowMatchEulerDiscreteSchedulerOutput(prev_sample=sample)
278
+
279
+ def __len__(self):
280
+ return self.config.num_train_timesteps
requirements.txt CHANGED
@@ -1,17 +1,13 @@
1
  accelerate>=1.8.0
2
  beautifulsoup4
3
- diffusers @ git+https://github.com/initml/diffusers.git@clement/feature/flash_sd3
4
  ftfy
5
- gradio==5.34.1
6
- numpy==1.26.4
7
  invisible_watermark
8
  optimum
9
- peft >= 0.6.0
10
  sentencepiece==0.2.0
11
  spaces
12
- --extra-index-url https://download.pytorch.org/whl/cu121
13
- torch==2.5.1
14
- torchaudio>=2.1.0
15
- torchvision>=0.16.0
16
  transformers>=4.34.0
17
- xformers>=0.0.22.post7
 
1
  accelerate>=1.8.0
2
  beautifulsoup4
3
+ diffusers>=0.30
4
  ftfy
5
+ gradio==5.49.1
6
+ numpy<2
7
  invisible_watermark
8
  optimum
9
+ peft>=0.6.0
10
  sentencepiece==0.2.0
11
  spaces
12
+ torchvision
 
 
 
13
  transformers>=4.34.0