TheFloat16
/

Llama3-70b-Instruct-TRTLLM

Text Generation

Model card Files Files and versions

matichon commited on Jun 14, 2024

Commit

135bb9c

·

verified ·

1 Parent(s): 494657e

Create config.json

Files changed (1) hide show

config.json +28 -0

config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "producer": {
+        "name": "ammo",
+        "version": "0.7.4"
+    },
+    "architecture": "LlamaForCausalLM",
+    "dtype": "float16",
+    "num_hidden_layers": 80,
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "hidden_size": 8192,
+    "norm_epsilon": 1e-05,
+    "vocab_size": 128256,
+    "max_position_embeddings": 8192,
+    "hidden_act": "silu",
+    "use_parallel_embedding": true,
+    "embedding_sharding_dim": 0,
+    "quantization": {
+        "quant_algo": "W4A16_AWQ",
+        "kv_cache_quant_algo": "INT8",
+        "group_size": 64,
+        "has_zero_point": false,
+        "pre_quant_scale": true,
+        "exclude_modules": [
+            "lm_head"
+        ]
+    }
+}