Spaces:

jasperai
/

flash-sd3

Sleeping

App Files Files Community

[Admin maintenance] Support new ZeroGPU hardware

by multimodalart HF Staff - opened May 26

base: refs/heads/main

←

from: refs/pr/5

Discussion Files changed

+299

-20

Files changed (4) hide show

README.md +1 -1
app.py +13 -10
flash_flow_match_scheduler.py +280 -0
requirements.txt +5 -9

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: ⚡
 colorFrom: indigo
 colorTo: red
 sdk: gradio
-sdk_version: 5.34.1
 python_version: 3.12
 app_file: app.py
 pinned: false

 colorFrom: indigo
 colorTo: red
 sdk: gradio
+sdk_version: 5.49.1
 python_version: 3.12
 app_file: app.py
 pinned: false

app.py CHANGED Viewed

@@ -1,24 +1,27 @@
 import random
 import spaces
 import gradio as gr
 import numpy as np
 import torch
-from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel, FlashFlowMatchEulerDiscreteScheduler
 from peft import PeftModel
-import os
 from huggingface_hub import snapshot_download
-huggingface_token = os.getenv("HUGGINFACE_TOKEN")
 model_path = snapshot_download(
-    repo_id="stabilityai/stable-diffusion-3-medium",
     revision="refs/pr/26",
-    repo_type="model",
     ignore_patterns=["*.md", "*..gitattributes"],
     local_dir="stable-diffusion-3-medium",
-    token=huggingface_token, # type a new token-id.
-    )
 device = "cuda" if torch.cuda.is_available() else "cpu"
 IS_SPACE = os.environ.get("SPACE_ID", None) is not None
@@ -149,7 +152,7 @@ with gr.Blocks(css=css) as demo:
                 placeholder="Enter a negative prompt",
                 value="deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, NSFW, bad text"
             )
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
@@ -161,7 +164,7 @@ with gr.Blocks(css=css) as demo:
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance scale",
                     minimum=0.0,
@@ -169,7 +172,7 @@ with gr.Blocks(css=css) as demo:
                     step=0.1,
                     value=1.0,
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=4,

+import os
 import random
 import spaces
 import gradio as gr
 import numpy as np
 import torch
+from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel
 from peft import PeftModel
 from huggingface_hub import snapshot_download
+from flash_flow_match_scheduler import FlashFlowMatchEulerDiscreteScheduler
+huggingface_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINFACE_TOKEN")
 model_path = snapshot_download(
+    repo_id="stabilityai/stable-diffusion-3-medium",
     revision="refs/pr/26",
+    repo_type="model",
     ignore_patterns=["*.md", "*..gitattributes"],
     local_dir="stable-diffusion-3-medium",
+    token=huggingface_token,
+)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 IS_SPACE = os.environ.get("SPACE_ID", None) is not None
                 placeholder="Enter a negative prompt",
                 value="deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, NSFW, bad text"
             )
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance scale",
                     minimum=0.0,
                     step=0.1,
                     value=1.0,
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=4,

flash_flow_match_scheduler.py ADDED Viewed

	@@ -0,0 +1,280 @@

+# Copyright 2024 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import Optional, Tuple, Union
+import numpy as np
+import torch
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.utils import BaseOutput, logging
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+@dataclass
+class FlashFlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's `step` function output.
+    Args:
+        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+    prev_sample: torch.FloatTensor
+class FlashFlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
+    """
+    Euler scheduler.
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
+    Args:
+        num_train_timesteps (`int`, defaults to 1000):
+            The number of diffusion steps to train the model.
+        timestep_spacing (`str`, defaults to `"linspace"`):
+            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
+            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
+        shift (`float`, defaults to 1.0):
+            The shift value for the timestep schedule.
+    """
+    _compatibles = []
+    order = 1
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        shift: float = 1.0,
+    ):
+        timesteps = np.linspace(
+            1, num_train_timesteps, num_train_timesteps, dtype=np.float32
+        )[::-1].copy()
+        timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32)
+        sigmas = timesteps / num_train_timesteps
+        sigmas = shift * sigmas / (1 + (shift - 1) * sigmas)
+        self.timesteps = sigmas * num_train_timesteps
+        self._step_index = None
+        self._begin_index = None
+        self.sigmas = sigmas.to("cpu")  # to avoid too much CPU/GPU communication
+        self.sigma_min = self.sigmas[-1].item()
+        self.sigma_max = self.sigmas[0].item()
+    @property
+    def step_index(self):
+        """
+        The index counter for current timestep. It will increase 1 after each scheduler step.
+        """
+        return self._step_index
+    @property
+    def begin_index(self):
+        """
+        The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
+        """
+        return self._begin_index
+    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
+    def set_begin_index(self, begin_index: int = 0):
+        """
+        Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
+        Args:
+            begin_index (`int`):
+                The begin index for the scheduler.
+        """
+        self._begin_index = begin_index
+    def scale_noise(
+        self,
+        sample: torch.FloatTensor,
+        timestep: Union[float, torch.FloatTensor],
+        noise: Optional[torch.FloatTensor] = None,
+    ) -> torch.FloatTensor:
+        """
+        Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
+        current timestep. Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm.
+        Args:
+            sample (`torch.FloatTensor`):
+                The input sample.
+            timestep (`int`, *optional*):
+                The current timestep in the diffusion chain.
+        Returns:
+            `torch.FloatTensor`:
+                A scaled input sample.
+        """
+        if self.step_index is None:
+            self._init_step_index(timestep)
+        sigma = self.sigmas[self.step_index]
+        sample = sigma * noise + (1.0 - sigma) * sample
+        return sample
+    def _sigma_to_t(self, sigma):
+        return sigma * self.config.num_train_timesteps
+    def set_timesteps(
+        self, num_inference_steps: int, device: Union[str, torch.device] = None
+    ):
+        """
+        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
+        Args:
+            num_inference_steps (`int`):
+                The number of diffusion steps used when generating samples with a pre-trained model.
+            device (`str` or `torch.device`, *optional*):
+                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+        """
+        self.num_inference_steps = num_inference_steps
+        timesteps = np.linspace(
+            self._sigma_to_t(self.sigma_max),
+            self._sigma_to_t(self.sigma_min),
+            num_inference_steps,
+        )
+        sigmas = timesteps / self.config.num_train_timesteps
+        sigmas = self.config.shift * sigmas / (1 + (self.config.shift - 1) * sigmas)
+        sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device)
+        timesteps = sigmas * self.config.num_train_timesteps
+        self.timesteps = timesteps.to(device=device)
+        self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)])
+        self._step_index = None
+        self._begin_index = None
+    def index_for_timestep(self, timestep, schedule_timesteps=None):
+        if schedule_timesteps is None:
+            schedule_timesteps = self.timesteps
+        indices = (schedule_timesteps == timestep).nonzero()
+        # The sigma index that is taken for the **very** first `step`
+        # is always the second index (or the last index if there is only 1)
+        # This way we can ensure we don't accidentally skip a sigma in
+        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
+        pos = 1 if len(indices) > 1 else 0
+        return indices[pos].item()
+    def _init_step_index(self, timestep):
+        if self.begin_index is None:
+            if isinstance(timestep, torch.Tensor):
+                timestep = timestep.to(self.timesteps.device)
+            self._step_index = self.index_for_timestep(timestep)
+        else:
+            self._step_index = self._begin_index
+    def step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: Union[float, torch.FloatTensor],
+        sample: torch.FloatTensor,
+        s_churn: float = 0.0,
+        s_tmin: float = 0.0,
+        s_tmax: float = float("inf"),
+        s_noise: float = 1.0,
+        generator: Optional[torch.Generator] = None,
+        return_dict: bool = True,
+    ) -> Union[FlashFlowMatchEulerDiscreteSchedulerOutput, Tuple]:
+        """
+        Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
+        process from the learned model outputs (most often the predicted noise).
+        Args:
+            model_output (`torch.FloatTensor`):
+                The direct output from learned diffusion model.
+            timestep (`float`):
+                The current discrete timestep in the diffusion chain.
+            sample (`torch.FloatTensor`):
+                A current instance of a sample created by the diffusion process.
+            s_churn (`float`):
+            s_tmin  (`float`):
+            s_tmax  (`float`):
+            s_noise (`float`, defaults to 1.0):
+                Scaling factor for noise added to the sample.
+            generator (`torch.Generator`, *optional*):
+                A random number generator.
+            return_dict (`bool`):
+                Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
+                tuple.
+        Returns:
+            [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
+                If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
+                returned, otherwise a tuple is returned where the first element is the sample tensor.
+        """
+        if (
+            isinstance(timestep, int)
+            or isinstance(timestep, torch.IntTensor)
+            or isinstance(timestep, torch.LongTensor)
+        ):
+            raise ValueError(
+                (
+                    "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
+                    " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass"
+                    " one of the `scheduler.timesteps` as a timestep."
+                ),
+            )
+        if self.step_index is None:
+            self._init_step_index(timestep)
+        sigma = self.sigmas[self.step_index]
+        # Upcast to avoid precision issues when computing prev_sample
+        # sample = sample.to(torch.float32
+        sample = sample - model_output * sigma
+        if self.step_index < self.num_inference_steps - 1:
+            sigma_next = self.sigmas[self.step_index + 1]
+            noise = randn_tensor(
+                model_output.shape,
+                generator=generator,
+                device=model_output.device,
+                dtype=sample.dtype,
+            )
+            sample = sigma_next * noise + (1.0 - sigma_next) * sample
+        # upon completion increase step index by one
+        self._step_index += 1
+        if not return_dict:
+            return (sample,)
+        return FlashFlowMatchEulerDiscreteSchedulerOutput(prev_sample=sample)
+    def __len__(self):
+        return self.config.num_train_timesteps

requirements.txt CHANGED Viewed

@@ -1,17 +1,13 @@
 accelerate>=1.8.0
 beautifulsoup4
-diffusers @ git+https://github.com/initml/diffusers.git@clement/feature/flash_sd3
 ftfy
-gradio==5.34.1
-numpy==1.26.4
 invisible_watermark
 optimum
-peft >= 0.6.0
 sentencepiece==0.2.0
 spaces
---extra-index-url https://download.pytorch.org/whl/cu121
-torch==2.5.1
-torchaudio>=2.1.0
-torchvision>=0.16.0
 transformers>=4.34.0
-xformers>=0.0.22.post7

 accelerate>=1.8.0
 beautifulsoup4
+diffusers>=0.30
 ftfy
+gradio==5.49.1
+numpy<2
 invisible_watermark
 optimum
+peft>=0.6.0
 sentencepiece==0.2.0
 spaces
+torchvision
 transformers>=4.34.0