Omdano
/

INT8-H16P

Omdano commited on Oct 5, 2025

Commit

e020402

verified ·

1 Parent(s): 8eada2c

Upload folder using huggingface_hub

Files changed (2) hide show

config.json CHANGED Viewed

@@ -24,23 +24,25 @@
   "pos_embed_shift": null,
   "proj_bias": true,
   "quantization_config": {
-    "_load_in_4bit": false,
-    "_load_in_8bit": true,
-    "bnb_4bit_compute_dtype": "float32",
-    "bnb_4bit_quant_storage": "uint8",
-    "bnb_4bit_quant_type": "fp4",
-    "bnb_4bit_use_double_quant": false,
-    "llm_int8_enable_fp32_cpu_offload": false,
-    "llm_int8_has_fp16_weight": false,
-    "llm_int8_skip_modules": null,
-    "llm_int8_threshold": 6.0,
-    "load_in_4bit": false,
-    "load_in_8bit": true,
-    "quant_method": "bitsandbytes"
   },
   "query_bias": true,
   "rope_theta": 100.0,
-  "torch_dtype": "float16",
   "transformers_version": "4.56.0.dev0",
   "use_gated_mlp": true,
   "value_bias": true

   "pos_embed_shift": null,
   "proj_bias": true,
   "quantization_config": {
+    "include_input_output_embeddings": false,
+    "modules_to_not_convert": null,
+    "quant_method": "torchao",
+    "quant_type": {
+      "default": {
+        "_data": {
+          "group_size": null,
+          "set_inductor_config": true
+        },
+        "_type": "Int8WeightOnlyConfig",
+        "_version": 1
+      }
+    },
+    "quant_type_kwargs": {},
+    "untie_embedding_weights": false
   },
   "query_bias": true,
   "rope_theta": 100.0,
+  "torch_dtype": "bfloat16",
   "transformers_version": "4.56.0.dev0",
   "use_gated_mlp": true,
   "value_bias": true

pytorch_model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ddfb990ead971cf482cf831299fb0ddc61895606e6d2d2daff52ad4607b7a37
+size 848027774