Omdano
/

INT8-H16P

Omdano commited on Oct 5, 2025

Commit

1209faf

verified ·

1 Parent(s): c7f451a

Add back TorchAO INT8 quantization_config for proper loading

Files changed (1) hide show

config.json CHANGED Viewed

@@ -23,6 +23,23 @@
   "pos_embed_rescale": 2.0,
   "pos_embed_shift": null,
   "proj_bias": true,
   "query_bias": true,
   "rope_theta": 100.0,
   "torch_dtype": "bfloat16",

   "pos_embed_rescale": 2.0,
   "pos_embed_shift": null,
   "proj_bias": true,
+  "quantization_config": {
+    "include_input_output_embeddings": false,
+    "modules_to_not_convert": null,
+    "quant_method": "torchao",
+    "quant_type": {
+      "default": {
+        "_data": {
+          "group_size": null,
+          "set_inductor_config": true
+        },
+        "_type": "Int8WeightOnlyConfig",
+        "_version": 1
+      }
+    },
+    "quant_type_kwargs": {},
+    "untie_embedding_weights": false
+  },
   "query_bias": true,
   "rope_theta": 100.0,
   "torch_dtype": "bfloat16",