Omdano
/

INT8-H16P

Omdano commited on Oct 5, 2025

Commit

c68d969

verified ·

1 Parent(s): 29acac0

Re-quantize with float16 for T4 GPU compatibility

Files changed (2) hide show

config.json CHANGED Viewed

@@ -23,26 +23,9 @@
   "pos_embed_rescale": 2.0,
   "pos_embed_shift": null,
   "proj_bias": true,
-  "quantization_config": {
-    "include_input_output_embeddings": false,
-    "modules_to_not_convert": null,
-    "quant_method": "torchao",
-    "quant_type": {
-      "default": {
-        "_data": {
-          "group_size": null,
-          "set_inductor_config": true
-        },
-        "_type": "Int8WeightOnlyConfig",
-        "_version": 1
-      }
-    },
-    "quant_type_kwargs": {},
-    "untie_embedding_weights": false
-  },
   "query_bias": true,
   "rope_theta": 100.0,
-  "torch_dtype": "bfloat16",
   "transformers_version": "4.56.0.dev0",
   "use_gated_mlp": true,
   "value_bias": true

   "pos_embed_rescale": 2.0,
   "pos_embed_shift": null,
   "proj_bias": true,
   "query_bias": true,
   "rope_theta": 100.0,
+  "torch_dtype": "float16",
   "transformers_version": "4.56.0.dev0",
   "use_gated_mlp": true,
   "value_bias": true

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ddfb990ead971cf482cf831299fb0ddc61895606e6d2d2daff52ad4607b7a37
-size 848027774

 version https://git-lfs.github.com/spec/v1
+oid sha256:2da795bf7efefbcaea79b7c891021a833aaa016808ea8ba17bf3db267ca27225
+size 848011582