Upload Ouro-2.6B_smoothquant_W8A8 with bundled source code
Browse files
config.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
"attention_dropout": 0.0,
|
| 6 |
"auto_map": {
|
| 7 |
"AutoConfig": "configuration_ouro.OuroConfig",
|
| 8 |
-
"AutoModelForCausalLM": "
|
| 9 |
},
|
| 10 |
"bos_token_id": 1,
|
| 11 |
"early_exit_threshold": 1.0,
|
|
|
|
| 5 |
"attention_dropout": 0.0,
|
| 6 |
"auto_map": {
|
| 7 |
"AutoConfig": "configuration_ouro.OuroConfig",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_qouro.OuroForCausalLMQuantized"
|
| 9 |
},
|
| 10 |
"bos_token_id": 1,
|
| 11 |
"early_exit_threshold": 1.0,
|
modeling_qouro.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from qouro_runtime.modeling_qouro import OuroForCausalLMQuantized as OuroForCausalLMQuantized
|
qouro_runtime/quantization/__pycache__/pipeline.cpython-312.pyc
CHANGED
|
Binary files a/qouro_runtime/quantization/__pycache__/pipeline.cpython-312.pyc and b/qouro_runtime/quantization/__pycache__/pipeline.cpython-312.pyc differ
|
|
|
qouro_runtime/quantization/pipeline.py
CHANGED
|
@@ -142,8 +142,10 @@ def run_quantization_pipeline(
|
|
| 142 |
quantized_config.quantization = quant_config_dict
|
| 143 |
quantized_config.architectures = ["OuroForCausalLMQuantized"]
|
| 144 |
quantized_config.auto_map = {
|
| 145 |
-
|
| 146 |
-
"
|
|
|
|
|
|
|
| 147 |
}
|
| 148 |
|
| 149 |
quantized_model = OuroForCausalLMQuantized(quantized_config)
|
|
|
|
| 142 |
quantized_config.quantization = quant_config_dict
|
| 143 |
quantized_config.architectures = ["OuroForCausalLMQuantized"]
|
| 144 |
quantized_config.auto_map = {
|
| 145 |
+
# AutoModel 也采用 module_file.ClassName(根目录存在 modeling_qouro.py 包装文件)
|
| 146 |
+
"AutoModelForCausalLM": "modeling_qouro.OuroForCausalLMQuantized",
|
| 147 |
+
# AutoConfig 采用 module_file.ClassName(根目录存在 configuration_ouro.py)
|
| 148 |
+
"AutoConfig": "configuration_ouro.OuroConfig",
|
| 149 |
}
|
| 150 |
|
| 151 |
quantized_model = OuroForCausalLMQuantized(quantized_config)
|