Spaces:
Runtime error
Runtime error
| from abc import ABC | |
| from typing import Optional | |
| from pydantic import BaseModel, Field | |
| ### DTO Definitions | |
| class QuantizationConfig(ABC, BaseModel): | |
| pass | |
| class ConvertRequest(ABC, BaseModel): | |
| hf_model_name: str | |
| hf_tokenizer_name: Optional[str] = Field(None, description="Hugging Face tokenizer name. Defaults to hf_model_name") | |
| hf_token: Optional[str] = Field(None, description="Hugging Face token for private models") | |
| hf_push_repo: Optional[str] = Field(None, description="Hugging Face repo to push the converted model. If not provided, the model will be downloaded only.") | |
| ### ------- | |
| ### Quantization Configurations | |
| class AWQQuantizationConfig(QuantizationConfig): | |
| zero_point: Optional[bool] = Field(True, description="Use zero point quantization") | |
| q_group_size: Optional[int] = Field(128, description="Quantization group size") | |
| w_bit: Optional[int] = Field(4, description="Weight bit") | |
| version: Optional[str] = Field("GEMM", description="Quantization version") | |
| class GPTQQuantizationConfig(QuantizationConfig): | |
| pass | |
| class GGUFQuantizationConfig(QuantizationConfig): | |
| pass | |
| class AWQConvertionRequest(ConvertRequest): | |
| quantization_config: Optional[AWQQuantizationConfig] = Field( | |
| default_factory=lambda: AWQQuantizationConfig(), | |
| description="AWQ quantization configuration" | |
| ) | |
| class GPTQConvertionRequest(ConvertRequest): | |
| quantization_config: Optional[GPTQQuantizationConfig] = Field( | |
| default_factory=lambda: GPTQQuantizationConfig(), | |
| description="GPTQ quantization configuration" | |
| ) | |
| class GGUFConvertionRequest(ConvertRequest): | |
| quantization_config: Optional[GGUFQuantizationConfig] = Field( | |
| default_factory=lambda: GGUFQuantizationConfig(), | |
| description="GGUF quantization configuration" | |
| ) | |
| ### ------- |