Upload folder using huggingface_hub

Files changed (4) hide show

yi-34b-tp2-awq/config.json CHANGED Viewed

@@ -1,14 +1,17 @@
 {
   "builder_config": {
-    "fp8": false,
     "hidden_act": "silu",
     "hidden_size": 7168,
-    "int8": false,
     "max_batch_size": 24,
     "max_input_len": 3500,
     "max_num_tokens": null,
     "max_output_len": 1000,
     "max_position_embeddings": 4096,
     "name": "llama",
     "num_heads": 56,
     "num_kv_heads": 8,
@@ -24,13 +27,15 @@
   "plugin_config": {
     "attention_qk_half_accumulation": false,
     "bert_attention_plugin": false,
-    "context_fmha_type": 1,
     "gemm_plugin": "float16",
     "gpt_attention_plugin": "float16",
     "identity_plugin": false,
     "layernorm_plugin": false,
     "layernorm_quantization_plugin": false,
     "lookup_plugin": false,
     "nccl_plugin": "float16",
     "paged_kv_cache": true,
     "quantize_per_token_plugin": false,
@@ -39,8 +44,10 @@
     "rmsnorm_plugin": false,
     "rmsnorm_quantization_plugin": false,
     "smooth_quant_gemm_plugin": false,
-    "tokens_per_block": 64,
     "use_custom_all_reduce": false,
     "weight_only_groupwise_quant_matmul_plugin": "float16",
     "weight_only_quant_matmul_plugin": false
   }

 {
   "builder_config": {
+    "gather_all_token_logits": false,
     "hidden_act": "silu",
     "hidden_size": 7168,
+    "int8": true,
+    "lora_target_modules": [],
     "max_batch_size": 24,
+    "max_beam_width": 1,
     "max_input_len": 3500,
     "max_num_tokens": null,
     "max_output_len": 1000,
     "max_position_embeddings": 4096,
+    "max_prompt_embedding_table_size": 0,
     "name": "llama",
     "num_heads": 56,
     "num_kv_heads": 8,
   "plugin_config": {
     "attention_qk_half_accumulation": false,
     "bert_attention_plugin": false,
+    "context_fmha_type": 0,
     "gemm_plugin": "float16",
     "gpt_attention_plugin": "float16",
     "identity_plugin": false,
     "layernorm_plugin": false,
     "layernorm_quantization_plugin": false,
     "lookup_plugin": false,
+    "lora_plugin": false,
+    "multi_block_mode": false,
     "nccl_plugin": "float16",
     "paged_kv_cache": true,
     "quantize_per_token_plugin": false,
     "rmsnorm_plugin": false,
     "rmsnorm_quantization_plugin": false,
     "smooth_quant_gemm_plugin": false,
+    "tokens_per_block": 128,
+    "use_context_fmha_for_generation": false,
     "use_custom_all_reduce": false,
+    "use_paged_context_fmha": false,
     "weight_only_groupwise_quant_matmul_plugin": "float16",
     "weight_only_quant_matmul_plugin": false
   }

yi-34b-tp2-awq/llama_float16_tp2_rank0.engine CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d189be8c8d03648abf44ec18af59312921f42e9df4ad76417066068cc26adee
-size 9681099628

 version https://git-lfs.github.com/spec/v1
+oid sha256:6db27e514a00d42f74be833f4c93aed531ad2cc0f1f48df0d922c5411bed674b
+size 9681139644

yi-34b-tp2-awq/llama_float16_tp2_rank1.engine CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e433ba1d599675c63d0b1653627ce9e827b7097cec8038cf8a5bc2afe2711c30
-size 9681099628

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea0a03899eee67eb856d55d1182839cff74804c0083390e47fe3d079db13efe2
+size 9681139644

yi-34b-tp2-awq/model.cache CHANGED Viewed

Binary files a/yi-34b-tp2-awq/model.cache and b/yi-34b-tp2-awq/model.cache differ