Fix generator determinism: forward generator through scheduler steps and seeded noise

Browse files

Files changed (4) hide show

DiT-MoE-B-8E2A/pipeline.py +21 -27
DiT-MoE-S-8E2A/pipeline.py +21 -27
DiT-MoE-XL-8E2A/pipeline.py +21 -32
README.md +0 -3

DiT-MoE-B-8E2A/pipeline.py CHANGED Viewed

@@ -1,23 +1,15 @@
-# Copyright 2026 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import importlib
 import inspect
 import json
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
 import torch
@@ -72,6 +64,20 @@ class DiTMoEPipeline(DiffusionPipeline):
     Each checkpoint keeps an English `id2label` map in `model_index.json` (DiT-style).
     """
     model_cpu_offload_seq = "transformer->vae"
     _optional_components = ["vae"]
@@ -286,19 +292,6 @@ class DiTMoEPipeline(DiffusionPipeline):
             dtype=dtype,
         )
-    @staticmethod
-    def prepare_extra_step_kwargs(
-        scheduler: KarrasDiffusionSchedulers,
-        generator: Optional[Union[torch.Generator, List[torch.Generator]]],
-        eta: float,
-    ) -> Dict[str, object]:
-        kwargs: Dict[str, object] = {}
-        step_params = set(inspect.signature(scheduler.step).parameters.keys())
-        if "eta" in step_params:
-            kwargs["eta"] = eta
-        if "generator" in step_params:
-            kwargs["generator"] = generator
-        return kwargs
     def _apply_cfg(self, model_output: torch.Tensor, guidance_scale: float) -> torch.Tensor:
         if guidance_scale <= 1.0:
@@ -402,6 +395,7 @@ class DiTMoEPipeline(DiffusionPipeline):
                     timestep_batch[:batch_size] if do_cfg else timestep_batch,
                     latents_cfg,
                     next_timestep=next_timestep,
                 ).prev_sample
                 latents = step_output if not do_cfg else torch.cat([step_output, step_output], dim=0)
             latents = latents[:batch_size]

+"""Hub custom pipeline: DiTMoEPipeline.
+Load with native Hugging Face diffusers and trust_remote_code=True.
+"""
+from __future__ import annotations
 import importlib
 import inspect
 import json
 import sys
 from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Union, Any
 import torch
     Each checkpoint keeps an English `id2label` map in `model_index.json` (DiT-style).
     """
+    @staticmethod
+    def prepare_extra_step_kwargs(
+        scheduler,
+        generator=None,
+        eta: float | None = None,
+    ):
+        kwargs = {}
+        step_params = set(inspect.signature(scheduler.step).parameters.keys())
+        if "generator" in step_params:
+            kwargs["generator"] = generator
+        if eta is not None and "eta" in step_params:
+            kwargs["eta"] = eta
+        return kwargs
     model_cpu_offload_seq = "transformer->vae"
     _optional_components = ["vae"]
             dtype=dtype,
         )
     def _apply_cfg(self, model_output: torch.Tensor, guidance_scale: float) -> torch.Tensor:
         if guidance_scale <= 1.0:
                     timestep_batch[:batch_size] if do_cfg else timestep_batch,
                     latents_cfg,
                     next_timestep=next_timestep,
+                    **extra_step_kwargs,
                 ).prev_sample
                 latents = step_output if not do_cfg else torch.cat([step_output, step_output], dim=0)
             latents = latents[:batch_size]

DiT-MoE-S-8E2A/pipeline.py CHANGED Viewed

@@ -1,23 +1,15 @@
-# Copyright 2026 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import importlib
 import inspect
 import json
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
 import torch
@@ -72,6 +64,20 @@ class DiTMoEPipeline(DiffusionPipeline):
     Each checkpoint keeps an English `id2label` map in `model_index.json` (DiT-style).
     """
     model_cpu_offload_seq = "transformer->vae"
     _optional_components = ["vae"]
@@ -286,19 +292,6 @@ class DiTMoEPipeline(DiffusionPipeline):
             dtype=dtype,
         )
-    @staticmethod
-    def prepare_extra_step_kwargs(
-        scheduler: KarrasDiffusionSchedulers,
-        generator: Optional[Union[torch.Generator, List[torch.Generator]]],
-        eta: float,
-    ) -> Dict[str, object]:
-        kwargs: Dict[str, object] = {}
-        step_params = set(inspect.signature(scheduler.step).parameters.keys())
-        if "eta" in step_params:
-            kwargs["eta"] = eta
-        if "generator" in step_params:
-            kwargs["generator"] = generator
-        return kwargs
     def _apply_cfg(self, model_output: torch.Tensor, guidance_scale: float) -> torch.Tensor:
         if guidance_scale <= 1.0:
@@ -402,6 +395,7 @@ class DiTMoEPipeline(DiffusionPipeline):
                     timestep_batch[:batch_size] if do_cfg else timestep_batch,
                     latents_cfg,
                     next_timestep=next_timestep,
                 ).prev_sample
                 latents = step_output if not do_cfg else torch.cat([step_output, step_output], dim=0)
             latents = latents[:batch_size]

+"""Hub custom pipeline: DiTMoEPipeline.
+Load with native Hugging Face diffusers and trust_remote_code=True.
+"""
+from __future__ import annotations
 import importlib
 import inspect
 import json
 import sys
 from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Union, Any
 import torch
     Each checkpoint keeps an English `id2label` map in `model_index.json` (DiT-style).
     """
+    @staticmethod
+    def prepare_extra_step_kwargs(
+        scheduler,
+        generator=None,
+        eta: float | None = None,
+    ):
+        kwargs = {}
+        step_params = set(inspect.signature(scheduler.step).parameters.keys())
+        if "generator" in step_params:
+            kwargs["generator"] = generator
+        if eta is not None and "eta" in step_params:
+            kwargs["eta"] = eta
+        return kwargs
     model_cpu_offload_seq = "transformer->vae"
     _optional_components = ["vae"]
             dtype=dtype,
         )
     def _apply_cfg(self, model_output: torch.Tensor, guidance_scale: float) -> torch.Tensor:
         if guidance_scale <= 1.0:
                     timestep_batch[:batch_size] if do_cfg else timestep_batch,
                     latents_cfg,
                     next_timestep=next_timestep,
+                    **extra_step_kwargs,
                 ).prev_sample
                 latents = step_output if not do_cfg else torch.cat([step_output, step_output], dim=0)
             latents = latents[:batch_size]

DiT-MoE-XL-8E2A/pipeline.py CHANGED Viewed

@@ -1,23 +1,15 @@
-# Copyright 2026 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import importlib
 import inspect
 import json
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
 import torch
@@ -72,6 +64,20 @@ class DiTMoEPipeline(DiffusionPipeline):
     Each checkpoint keeps an English `id2label` map in `model_index.json` (DiT-style).
     """
     model_cpu_offload_seq = "transformer->vae"
     _optional_components = ["vae"]
@@ -133,12 +139,7 @@ class DiTMoEPipeline(DiffusionPipeline):
         id2label_override = kwargs.pop("id2label", None)
         null_class_id_override = kwargs.pop("null_class_id", None)
-        use_flash_attn_override = kwargs.pop("use_flash_attn", None)
         model_kwargs = dict(kwargs)
-        if use_flash_attn_override is not None:
-            model_kwargs["use_flash_attn"] = use_flash_attn_override
-        elif torch.cuda.is_available() and torch.cuda.get_device_capability()[0] < 8:
-            model_kwargs["use_flash_attn"] = False
         inserted: List[str] = []
         def _load_component(folder: str, module_name: str, class_name: str):
@@ -291,19 +292,6 @@ class DiTMoEPipeline(DiffusionPipeline):
             dtype=dtype,
         )
-    @staticmethod
-    def prepare_extra_step_kwargs(
-        scheduler: KarrasDiffusionSchedulers,
-        generator: Optional[Union[torch.Generator, List[torch.Generator]]],
-        eta: float,
-    ) -> Dict[str, object]:
-        kwargs: Dict[str, object] = {}
-        step_params = set(inspect.signature(scheduler.step).parameters.keys())
-        if "eta" in step_params:
-            kwargs["eta"] = eta
-        if "generator" in step_params:
-            kwargs["generator"] = generator
-        return kwargs
     def _apply_cfg(self, model_output: torch.Tensor, guidance_scale: float) -> torch.Tensor:
         if guidance_scale <= 1.0:
@@ -407,6 +395,7 @@ class DiTMoEPipeline(DiffusionPipeline):
                     timestep_batch[:batch_size] if do_cfg else timestep_batch,
                     latents_cfg,
                     next_timestep=next_timestep,
                 ).prev_sample
                 latents = step_output if not do_cfg else torch.cat([step_output, step_output], dim=0)
             latents = latents[:batch_size]

+"""Hub custom pipeline: DiTMoEPipeline.
+Load with native Hugging Face diffusers and trust_remote_code=True.
+"""
+from __future__ import annotations
 import importlib
 import inspect
 import json
 import sys
 from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Union, Any
 import torch
     Each checkpoint keeps an English `id2label` map in `model_index.json` (DiT-style).
     """
+    @staticmethod
+    def prepare_extra_step_kwargs(
+        scheduler,
+        generator=None,
+        eta: float | None = None,
+    ):
+        kwargs = {}
+        step_params = set(inspect.signature(scheduler.step).parameters.keys())
+        if "generator" in step_params:
+            kwargs["generator"] = generator
+        if eta is not None and "eta" in step_params:
+            kwargs["eta"] = eta
+        return kwargs
     model_cpu_offload_seq = "transformer->vae"
     _optional_components = ["vae"]
         id2label_override = kwargs.pop("id2label", None)
         null_class_id_override = kwargs.pop("null_class_id", None)
         model_kwargs = dict(kwargs)
         inserted: List[str] = []
         def _load_component(folder: str, module_name: str, class_name: str):
             dtype=dtype,
         )
     def _apply_cfg(self, model_output: torch.Tensor, guidance_scale: float) -> torch.Tensor:
         if guidance_scale <= 1.0:
                     timestep_batch[:batch_size] if do_cfg else timestep_batch,
                     latents_cfg,
                     next_timestep=next_timestep,
+                    **extra_step_kwargs,
                 ).prev_sample
                 latents = step_output if not do_cfg else torch.cat([step_output, step_output], dim=0)
             latents = latents[:batch_size]

README.md CHANGED Viewed

@@ -9,9 +9,6 @@ tags:
   - class-conditional
   - dit-moe
 pipeline_tag: unconditional-image-generation
-widget:
-- output:
-    url: DiT-MoE-XL-8E2A/demo.png
 ---
 # DiT-MoE-diffusers

   - class-conditional
   - dit-moe
 pipeline_tag: unconditional-image-generation
 ---
 # DiT-MoE-diffusers