Upload folder using huggingface_hub

Files changed (3) hide show

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7db39ec8451954e0907d096e0cf1b50bbef5570d4b053e256673b4c3260be691
 size 1909656064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcce99c37b6894f580400d2d87aa76ec25c2a7b12d7b60a3cc689fcd4e1523d9
 size 1909656064

recipe.yaml CHANGED Viewed

@@ -1,14 +1,18 @@
 default_stage:
   default_modifiers:
     SmoothQuantModifier:
-      smoothing_strength: 0.8
       mappings:
-      - - - .*norm.*
-          - [.*(q|k|v)_proj]
       ignore: []
-    QuantizationModifier:
       targets: [Linear]
-      ignore: [embed_tokens, lm_head, '']
       scheme: W8A8
       kv_cache_scheme:
         num_bits: 4
@@ -23,4 +27,7 @@ default_stage:
         zp_dtype: null
         observer: minmax
         observer_kwargs: {}
-      observer: {weights: minmax, input: minmax}

 default_stage:
   default_modifiers:
     SmoothQuantModifier:
+      smoothing_strength: 0.5
       mappings:
+      - - - re:.*ln_f
+          - [.*proj]
+      - - - re:.*attention_norm
+          - [.*q_proj, .*k_proj, .*v_proj]
+      - - - re:.*ffn_norm
+          - [.*gate_proj, .*up_proj]
       ignore: []
+    GPTQModifier:
       targets: [Linear]
+      ignore: [embed_tokens, lm_head]
       scheme: W8A8
       kv_cache_scheme:
         num_bits: 4
         zp_dtype: null
         observer: minmax
         observer_kwargs: {}
+      block_size: 128
+      dampening_frac: 0.01
+      actorder: static
+      offload_hessians: false

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 512,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 1024,
     "strategy": "LongestFirst",
     "stride": 0
   },