Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) 2025 NVIDIA CORPORATION. | |
| # Licensed under the MIT license. | |
| # Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license. | |
| # LICENSE is in incl_licenses directory. | |
| from dataclasses import dataclass | |
| from transformers import PretrainedConfig | |
| class QuantizationConfig: | |
| quantize_model: str = "false" | |
| symm: bool = True | |
| epsilon: float = 1e-10 | |
| fabit: str = "E4M3" | |
| fwbit: str = "E4M3" | |
| bobit: str = "E5M2" | |
| row_blocksize: int = -1 | |
| col_blocksize: int = -1 | |
| qchoice: str = "none" | |
| pad_to_multiple_of: int = 0 | |
| def __init__( | |
| self, | |
| quantize_model, | |
| symm, | |
| epsilon, | |
| fabit, | |
| fwbit, | |
| bobit, | |
| row_blocksize, | |
| col_blocksize, | |
| qchoice, | |
| pad_to_multiple_of, | |
| **kwargs, | |
| ): | |
| super().__init__() | |
| self.quantize_model = quantize_model | |
| self.symm = symm | |
| self.epsilon = epsilon | |
| self.fabit = fabit | |
| self.fwbit = fwbit | |
| self.bobit = bobit | |
| self.row_blocksize = row_blocksize | |
| self.col_blocksize = col_blocksize | |
| self.qchoice = qchoice | |
| self.pad_to_multiple_of = pad_to_multiple_of | |
| # class QuantizationConfig(PretrainedConfig): | |
| # def __init__( | |
| # self, | |
| # quantize_model="false", | |
| # symm=True, | |
| # epsilon=1e-10, | |
| # fabit="E4M3", | |
| # fwbit="E4M3", | |
| # bobit="E5M2", | |
| # row_blocksize=-1, | |
| # col_blocksize=-1, | |
| # qchoice="none", | |
| # pad_to_multiple_of=0, | |
| # **kwargs, | |
| # ): | |
| # super().__init__() | |
| # self.quantize_model = quantize_model | |
| # self.symm = symm | |
| # self.epsilon = epsilon | |
| # self.fabit = fabit | |
| # self.fwbit = fwbit | |
| # self.bobit = bobit | |
| # self.row_blocksize = row_blocksize | |
| # self.col_blocksize = col_blocksize | |
| # self.qchoice = qchoice | |
| # self.pad_to_multiple_of = pad_to_multiple_of | |