nm-testing
/

tinyllama-oneshot-w8a8-dynamic-token-v2

@@ -7,48 +7,46 @@
   "attention_dropout": 0.0,
   "bos_token_id": 1,
   "compression_config": {
-    "quantization_config": {
-      "config_groups": {
-        "group_0": {
-          "input_activations": {
-            "block_structure": null,
-            "dynamic": true,
-            "group_size": null,
-            "num_bits": 8,
-            "observer": "memoryless",
-            "observer_kwargs": {},
-            "strategy": "token",
-            "symmetric": true,
-            "type": "int"
-          },
-          "output_activations": null,
-          "targets": [
-            "Linear"
-          ],
-          "weights": {
-            "block_structure": null,
-            "dynamic": false,
-            "group_size": null,
-            "num_bits": 8,
-            "observer": "minmax",
-            "observer_kwargs": {},
-            "strategy": "tensor",
-            "symmetric": true,
-            "type": "int"
-          }
         }
-      },
-      "format": "int-quantized",
-      "global_compression_ratio": 1.2391304140415598,
-      "ignore": [
-        "lm_head"
-      ],
-      "quant_method": "sparseml",
-      "quantization_status": "frozen"
     },
     "sparsity_config": {
       "format": "dense",
-      "global_sparsity": 7.819320427273134,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
     }
@@ -68,7 +66,7 @@
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
   "transformers_version": "4.40.0",
   "use_cache": true,
   "vocab_size": 32000

   "attention_dropout": 0.0,
   "bos_token_id": 1,
   "compression_config": {
+    "config_groups": {
+      "group_0": {
+        "input_activations": {
+          "block_structure": null,
+          "dynamic": true,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "memoryless",
+          "observer_kwargs": {},
+          "strategy": "token",
+          "symmetric": true,
+          "type": "int"
+        },
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "tensor",
+          "symmetric": true,
+          "type": "int"
         }
+      }
     },
+    "format": "int-quantized",
+    "global_compression_ratio": 1.2391304140415598,
+    "ignore": [
+      "lm_head"
+    ],
+    "quant_method": "compressed-tensors",
+    "quantization_status": "frozen",
     "sparsity_config": {
       "format": "dense",
+      "global_sparsity": 7.826375935115232,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
     }
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
+  "torch_dtype": "float16",
   "transformers_version": "4.40.0",
   "use_cache": true,
   "vocab_size": 32000

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:416a7f8569151f839a863e5d4a8424f784e97dcdf77fd074aabdf46b61c10cd9
-size 1231252556

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b77c5cda8b7af33e238158ceadff4e01d904b0ef3e66c19cb04fa6f628b35a1
+size 1231252356