mgoin commited on
Commit
a2f09f9
·
verified ·
1 Parent(s): 37336cc

Updated compression_config to quantization_config

Browse files
Files changed (1) hide show
  1. config.json +31 -31
config.json CHANGED
@@ -16,36 +16,6 @@
16
  "blocksparse_triton_kernel_block_size": 64,
17
  "blocksparse_vert_stride": 8,
18
  "bos_token_id": 100257,
19
- "compression_config": {
20
- "config_groups": {
21
- "group_0": {
22
- "input_activations": null,
23
- "output_activations": null,
24
- "targets": [
25
- "Linear"
26
- ],
27
- "weights": {
28
- "block_structure": null,
29
- "dynamic": false,
30
- "group_size": null,
31
- "num_bits": 8,
32
- "observer": "minmax",
33
- "observer_kwargs": {},
34
- "strategy": "channel",
35
- "symmetric": true,
36
- "type": "int"
37
- }
38
- }
39
- },
40
- "format": "pack-quantized",
41
- "global_compression_ratio": 1.3018502182274538,
42
- "ignore": [
43
- "lm_head"
44
- ],
45
- "kv_cache_scheme": null,
46
- "quant_method": "compressed-tensors",
47
- "quantization_status": "frozen"
48
- },
49
  "dense_attention_every_n_layers": 2,
50
  "dummy_token_indices": [
51
  100256,
@@ -306,5 +276,35 @@
306
  "torch_dtype": "bfloat16",
307
  "transformers_version": "4.44.0",
308
  "use_cache": true,
309
- "vocab_size": 100352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
 
16
  "blocksparse_triton_kernel_block_size": 64,
17
  "blocksparse_vert_stride": 8,
18
  "bos_token_id": 100257,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "dense_attention_every_n_layers": 2,
20
  "dummy_token_indices": [
21
  100256,
 
276
  "torch_dtype": "bfloat16",
277
  "transformers_version": "4.44.0",
278
  "use_cache": true,
279
+ "vocab_size": 100352,
280
+ "quantization_config": {
281
+ "config_groups": {
282
+ "group_0": {
283
+ "input_activations": null,
284
+ "output_activations": null,
285
+ "targets": [
286
+ "Linear"
287
+ ],
288
+ "weights": {
289
+ "block_structure": null,
290
+ "dynamic": false,
291
+ "group_size": null,
292
+ "num_bits": 8,
293
+ "observer": "minmax",
294
+ "observer_kwargs": {},
295
+ "strategy": "channel",
296
+ "symmetric": true,
297
+ "type": "int"
298
+ }
299
+ }
300
+ },
301
+ "format": "pack-quantized",
302
+ "global_compression_ratio": 1.3018502182274538,
303
+ "ignore": [
304
+ "lm_head"
305
+ ],
306
+ "kv_cache_scheme": null,
307
+ "quant_method": "compressed-tensors",
308
+ "quantization_status": "frozen"
309
+ }
310
  }