Upload folder using huggingface_hub

Files changed (3) hide show

config.json CHANGED Viewed

@@ -90,15 +90,6 @@
     "format": "int-quantized",
     "global_compression_ratio": null,
     "ignore": [
-      "model.layers.27.mlp.gate_proj",
-      "model.layers.27.mlp.up_proj",
-      "model.layers.27.mlp.down_proj",
-      "model.layers.28.mlp.gate_proj",
-      "model.layers.28.mlp.up_proj",
-      "model.layers.28.mlp.down_proj",
-      "model.layers.29.mlp.gate_proj",
-      "model.layers.29.mlp.up_proj",
-      "model.layers.29.mlp.down_proj",
       "lm_head"
     ],
     "kv_cache_scheme": {
@@ -106,7 +97,7 @@
       "block_structure": null,
       "dynamic": false,
       "group_size": null,
-      "num_bits": 8,
       "observer": "minmax",
       "observer_kwargs": {},
       "scale_dtype": null,

     "format": "int-quantized",
     "global_compression_ratio": null,
     "ignore": [
       "lm_head"
     ],
     "kv_cache_scheme": {
       "block_structure": null,
       "dynamic": false,
       "group_size": null,
+      "num_bits": 4,
       "observer": "minmax",
       "observer_kwargs": {},
       "scale_dtype": null,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da147ceac30e7a9680fd4ac87f8633e40eeba4a024659b4b5c34a94775440a37
-size 1985091120

 version https://git-lfs.github.com/spec/v1
+oid sha256:7db39ec8451954e0907d096e0cf1b50bbef5570d4b053e256673b4c3260be691
+size 1909656064

recipe.yaml CHANGED Viewed

@@ -6,15 +6,12 @@ default_stage:
       - - - .*norm.*
           - [.*(q|k|v)_proj]
       ignore: []
-    GPTQModifier:
       targets: [Linear]
-      ignore: [embed_tokens, lm_head, model.layers.27.mlp.gate_proj, model.layers.27.mlp.up_proj,
-        model.layers.27.mlp.down_proj, model.layers.28.mlp.gate_proj, model.layers.28.mlp.up_proj,
-        model.layers.28.mlp.down_proj, model.layers.29.mlp.gate_proj, model.layers.29.mlp.up_proj,
-        model.layers.29.mlp.down_proj]
       scheme: W8A8
       kv_cache_scheme:
-        num_bits: 8
         type: int
         symmetric: true
         group_size: null
@@ -27,7 +24,3 @@ default_stage:
         observer: minmax
         observer_kwargs: {}
       observer: {weights: minmax, input: minmax}
-      block_size: 128
-      dampening_frac: 0.01
-      actorder: static
-      offload_hessians: false

       - - - .*norm.*
           - [.*(q|k|v)_proj]
       ignore: []
+    QuantizationModifier:
       targets: [Linear]
+      ignore: [embed_tokens, lm_head, '']
       scheme: W8A8
       kv_cache_scheme:
+        num_bits: 4
         type: int
         symmetric: true
         group_size: null
         observer: minmax
         observer_kwargs: {}
       observer: {weights: minmax, input: minmax}