YiYiXu
/

quant-block

Diffusers

Model card Files Files and versions

xet

Community

YiYiXu HF Staff commited on Jan 30

Commit

1181cf5

verified ·

1 Parent(s): cbd07bb

Update block.py

Browse files

Files changed (1) hide show

block.py +108 -51

block.py CHANGED Viewed

@@ -1,73 +1,130 @@
-from typing import List
 from diffusers.modular_pipelines import (
-    ComponentSpec,
     InputParam,
-    ModularPipelineBlocks,
     OutputParam,
     PipelineState,
 )
-class MyCustomBlock(ModularPipelineBlocks):
-    """
-    A custom block for [describe what your block does].
-    Replace this with your implementation.
-    """
     @property
     def description(self) -> str:
-        """Description of the block."""
-        return "A template custom block - replace with your description"
-    @property
-    def expected_components(self) -> List[ComponentSpec]:
-        """Define model components your block needs (e.g., transformers, VAEs)."""
-        return [
-            # Example:
-            # ComponentSpec(
-            #     name="model",
-            #     type_hint=SomeModelClass,
-            #     repo="organization/model-name",
-            # ),
-        ]
     @property
     def inputs(self) -> List[InputParam]:
-        """Define input parameters for your block."""
         return [
-            # Example:
-            # InputParam(
-            #     "prompt",
-            #     type_hint=str,
-            #     required=True,
-            #     description="Input prompt",
-            #     metadata={"mellon": "textbox"},  # For Mellon UI
-            # ),
         ]
     @property
-    def intermediate_outputs(self) -> List[OutputParam]:
-        """Define output parameters for your block."""
         return [
-            # Example:
-            # OutputParam(
-            #     "result",
-            #     type_hint=str,
-            #     description="Output result",
-            #     metadata={"mellon": "text"},  # For Mellon UI
-            # ),
         ]
-    def __call__(self, components, state: PipelineState) -> PipelineState:
-        """Execute your block logic."""
         block_state = self.get_block_state(state)
-        # Your implementation here
-        # Access inputs via block_state.<input_name>
-        # Set outputs via block_state.<output_name> = value
         self.set_block_state(state, block_state)
-        return components, state

+from typing import List, Optional
 from diffusers.modular_pipelines import (
     InputParam,
     OutputParam,
+    PipelineBlock,
     PipelineState,
 )
+class QuantizationConfigBlock(PipelineBlock):
+    """Block to create BitsAndBytes quantization config for model loading."""
     @property
     def description(self) -> str:
+        return "Creates a BitsAndBytes quantization config for loading models with reduced precision"
     @property
     def inputs(self) -> List[InputParam]:
         return [
+            # Target component
+            InputParam(
+                "component",
+                type_hint=str,
+                default="transformer",
+                description="Component name to apply quantization to",
+                metadata={"mellon": "dropdown"}
+            ),
+            # Bits selection
+            InputParam(
+                "quant_type",
+                type_hint=str,
+                default="bnb_4bit",
+                description="Quantization backend Type",
+                metadata={"mellon": "dropdown"},  # "options": ["bnb_4bit", "bnb_8bit"]
+            ),
+            # ===== 4-bit options =====
+            InputParam(
+                "bnb_4bit_quant_type",
+                type_hint=str,
+                default="nf4",
+                description="4-bit quantization type",
+                metadata={"mellon": "dropdown"},  # "options": ["nf4", "fp4"]
+            ),
+            InputParam(
+                "bnb_4bit_compute_dtype",
+                type_hint=Optional[str],
+                description="Compute dtype for 4-bit quantization",
+                metadata={"mellon": "dropdown"},  # "options": ["", "float32", "float16", "bfloat16"]
+            ),
+            InputParam(
+                "bnb_4bit_use_double_quant",
+                type_hint=bool,
+                default=False,
+                description="Use nested quantization (quantize the quantization constants)",
+                metadata={"mellon": "checkbox"}
+            ),
+            # ===== 8-bit options =====
+            InputParam(
+                "llm_int8_threshold",
+                type_hint=float,
+                default=6.0,
+                description="Outlier threshold for 8-bit quantization (values above this use fp16)",
+                metadata={"mellon": "slider"},
+            ),
+            InputParam(
+                "llm_int8_has_fp16_weight",
+                type_hint=bool,
+                default=False,
+                description="Keep weights in fp16 for 8-bit (useful for fine-tuning)",
+                metadata={"mellon": "checkbox"},
+            ),
+            InputParam(
+                "llm_int8_skip_modules",
+                type_hint=Optional[List[str]],
+            ),
         ]
     @property
+    def intermediates_outputs(self) -> List[OutputParam]:
         return [
+            OutputParam(
+                "quantization_config",
+                type_hint=dict,
+                description="Quantization config dict for load_components",
+            ),
         ]
+    def __call__(self, pipeline, state: PipelineState) -> PipelineState:
+        import torch
+        from diffusers import BitsAndBytesConfig
         block_state = self.get_block_state(state)
+        # Map string dtype to torch dtype
+        def str_to_dtype(dtype_str):
+            dtype_map = {
+                "": None,
+                "float32": torch.float32,
+                "float16": torch.float16,
+                "bfloat16": torch.bfloat16,
+                "uint8": torch.uint8,
+                "int8": torch.int8,
+                "float64": torch.float64,
+            }
+            return dtype_map.get(dtype_str, None)
+        if block_state.quant_type == "bnb_4bit":
+            config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type=block_state.bnb_4bit_quant_type,
+                bnb_4bit_compute_dtype=str_to_dtype(block_state.bnb_4bit_compute_dtype),
+                bnb_4bit_use_double_quant=block_state.bnb_4bit_use_double_quant,
+                llm_int8_skip_modules=block_state.llm_int8_skip_modules,
+            )
+        elif block_state.quant_type == "bnb_8bit":
+            config = BitsAndBytesConfig(
+                load_in_8bit=True,
+                llm_int8_threshold=block_state.llm_int8_threshold,
+                llm_int8_has_fp16_weight=block_state.llm_int8_has_fp16_weight,
+                llm_int8_skip_modules=block_state.llm_int8_skip_modules,
+            )
+        # Output as dict: {"transformer": config}
+        quantization_config = {block_state.component: config}
+        block_state.quantization_config = quantization_config
         self.set_block_state(state, block_state)
+        return pipeline, state