Upload folder using huggingface_hub

Files changed (7) hide show

config.json CHANGED Viewed

@@ -36,9 +36,11 @@
           "num_bits": 4,
           "observer": "memoryless_minmax",
           "observer_kwargs": {},
           "strategy": "group",
-          "symmetric": true,
-          "type": "int"
         }
       }
     },

           "num_bits": 4,
           "observer": "memoryless_minmax",
           "observer_kwargs": {},
+          "scale_dtype": null,
           "strategy": "group",
+          "symmetric": false,
+          "type": "int",
+          "zp_dtype": "torch.int8"
         }
       }
     },

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4b6705eaecf338922cba8daea69fce84ff315c225cba3a0a968656229810bc2
-size 4992538160

 version https://git-lfs.github.com/spec/v1
+oid sha256:e574c206e93c42a13ce226c8d125896933572ba7b72e3028503f1231525444f7
+size 4966132840

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45b281b6559df36c0e590901a15e1a7a1e82250020c9047a40db8852abbb1d1f
-size 4977002992

 version https://git-lfs.github.com/spec/v1
+oid sha256:00e58997ec624729d00bd922983c1bccd02ab7a869bf07eef38145acbc2f6744
+size 4962450680

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0843afa21724351b9520411ba0e5f06837c25b594399c5b74d347607c183695
-size 4038693632

 version https://git-lfs.github.com/spec/v1
+oid sha256:4334851eef580bc70403fa2326dbfa0edb869ae23e16d744c083bdca68f5818d
+size 4161978264

model.safetensors.index.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

recipe.yaml CHANGED Viewed

@@ -1,7 +1,22 @@
 default_stage:
   default_modifiers:
-    QuantizationModifier:
       targets: [Linear]
       ignore: [lm_head]
-      scheme: W4A16
       bypass_divisibility_checks: false

 default_stage:
   default_modifiers:
+    AWQModifier:
       targets: [Linear]
       ignore: [lm_head]
+      scheme: W4A16_ASYM
       bypass_divisibility_checks: false
+      mappings:
+      - smooth_layer: re:.*input_layernorm
+        balance_layers: ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
+        activation_hook_target: null
+      - smooth_layer: re:.*v_proj
+        balance_layers: ['re:.*o_proj']
+        activation_hook_target: null
+      - smooth_layer: re:.*post_attention_layernorm
+        balance_layers: ['re:.*gate_proj', 're:.*up_proj']
+        activation_hook_target: null
+      - smooth_layer: re:.*up_proj
+        balance_layers: ['re:.*down_proj']
+        activation_hook_target: null
+      duo_scaling: true
+      n_grid: 20

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d869676e23a78b395eb0308cff52fd9c3ea7d52597627360cdb97407bb0b02b8
-size 15783075

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d6fc6c24a257545cb8bc93f6da21ac69148cc5c36c5d3fd00eaaabf8facec17
+size 15783173