Omdano commited on
Commit
c68d969
·
verified ·
1 Parent(s): 29acac0

Re-quantize with float16 for T4 GPU compatibility

Browse files
Files changed (2) hide show
  1. config.json +1 -18
  2. pytorch_model.bin +2 -2
config.json CHANGED
@@ -23,26 +23,9 @@
23
  "pos_embed_rescale": 2.0,
24
  "pos_embed_shift": null,
25
  "proj_bias": true,
26
- "quantization_config": {
27
- "include_input_output_embeddings": false,
28
- "modules_to_not_convert": null,
29
- "quant_method": "torchao",
30
- "quant_type": {
31
- "default": {
32
- "_data": {
33
- "group_size": null,
34
- "set_inductor_config": true
35
- },
36
- "_type": "Int8WeightOnlyConfig",
37
- "_version": 1
38
- }
39
- },
40
- "quant_type_kwargs": {},
41
- "untie_embedding_weights": false
42
- },
43
  "query_bias": true,
44
  "rope_theta": 100.0,
45
- "torch_dtype": "bfloat16",
46
  "transformers_version": "4.56.0.dev0",
47
  "use_gated_mlp": true,
48
  "value_bias": true
 
23
  "pos_embed_rescale": 2.0,
24
  "pos_embed_shift": null,
25
  "proj_bias": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "query_bias": true,
27
  "rope_theta": 100.0,
28
+ "torch_dtype": "float16",
29
  "transformers_version": "4.56.0.dev0",
30
  "use_gated_mlp": true,
31
  "value_bias": true
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ddfb990ead971cf482cf831299fb0ddc61895606e6d2d2daff52ad4607b7a37
3
- size 848027774
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2da795bf7efefbcaea79b7c891021a833aaa016808ea8ba17bf3db267ca27225
3
+ size 848011582