Upload folder using huggingface_hub

Files changed (3) hide show

config.json CHANGED Viewed

@@ -49,6 +49,13 @@
     "format": "pack-quantized",
     "global_compression_ratio": null,
     "ignore": [
       "lm_head"
     ],
     "kv_cache_scheme": null,

     "format": "pack-quantized",
     "global_compression_ratio": null,
     "ignore": [
+      "model.layers.15.self_attn.q_proj",
+      "model.layers.15.self_attn.k_proj",
+      "model.layers.15.self_attn.v_proj",
+      "model.layers.15.self_attn.o_proj",
+      "model.layers.15.mlp.gate_proj",
+      "model.layers.15.mlp.up_proj",
+      "model.layers.15.mlp.down_proj",
       "lm_head"
     ],
     "kv_cache_scheme": null,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e3fbfeaa7e699acebff42e60592ff02c72caab1d9ccf4f075c699ceccc2673c
-size 3029762904

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f5c911ccb39c47ef38a1fef1852676782f5192ab2c6784cfda0f09ce72ba2d6
+size 3179183280

recipe.yaml CHANGED Viewed

@@ -2,7 +2,9 @@ default_stage:
   default_modifiers:
     GPTQModifier:
       targets: [Linear]
-      ignore: [lm_head]
       scheme: W4A16
       sequential_update: true
       block_size: 128

   default_modifiers:
     GPTQModifier:
       targets: [Linear]
+      ignore: [lm_head, model.layers.15.self_attn.q_proj, model.layers.15.self_attn.v_proj,
+        model.layers.15.self_attn.k_proj, model.layers.15.self_attn.o_proj, model.layers.15.mlp.gate_proj,
+        model.layers.15.mlp.up_proj, model.layers.15.mlp.down_proj]
       scheme: W4A16
       sequential_update: true
       block_size: 128