Removing all cached artifacts priori to 0.3.0
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b391179f2d4f2cc7a404.json +0 -1
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/meta-llama/Llama-3.1-8B-Instruct/a4e0275af090ae00d0f4.json +0 -1
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.28/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8a31499781b39a1babbc.json +0 -1
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/7353b00efd1c2cf456a5.json +0 -48
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/dfdea472b85b5e1c1bc0.json +0 -48
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/ecd6582c85ac47fe17d8.json +0 -48
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/23c029b3504f98db0dae.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/245ec3826de5d0d8c4f3.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9adf62282ade09a4ee97.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9e7df2d93403be65f917.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/e8aeb35344c4c108f038.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/21fede0703904ef6a5cc.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/45d32f74efe639efa140.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/5f89c0b1616392b66470.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/9c4fc51180f9ddea7ebd.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/b6f91fdddb08f318e31f.json +0 -41
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/00d8fe05e3026bef5097.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1e02b59600f20e2b4809.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5fab5c4bfd6cc5c466c0.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b4a848e7d155dba24978.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b8d797840e56152f0045.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d23d95cf8617c46c6790.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e3aa9bba36ac779dc68b.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-13b-hf/15497d4569cb4aabb3a9.json +0 -39
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-7b-hf/d75e02defdb0d74b4773.json +0 -39
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/0893aa250f27c3bca5d9.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/26f4ee07b3f4c0422285.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-1B/178748f8e86d0180fe29.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-3B/a6b80f5e9df4129d8a64.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3-8B/3f50b3c04cf531d956ff.json +0 -39
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/08baac4331a38cf9b5c6.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/2ce651c4b0160df7b1a7.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/53917ac5a736440f6651.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/63ae9940e985694a6de1.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/6fd804b37216317e4f8e.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/8929a74a3f085f34acec.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/9580f944931f95eff7e8.json +0 -45
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/6845a4ab255499aced61.json +0 -39
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/7ad601d64b726cbb4ba6.json +0 -39
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/455574355aa6f6df8272.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/9fcdfdf6fc4e018f496f.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/a3def2b08381a583fdae.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c37761e9cb8e4f9c854e.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c7b792c009f3a8f9bd58.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/fb824464b88d20b99ad4.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/13c11b9170e3b3cb8544.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/c0fe68f25c283c1998b1.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/be253ef73d692f0acdde.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ab22dc02d25b1a888451.json +0 -36
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/06ed075b59d7dee23809.json +0 -36
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b391179f2d4f2cc7a404.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 2, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 2, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 22, "num_key_value_heads": 4, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/meta-llama/Llama-3.1-8B-Instruct/a4e0275af090ae00d0f4.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": [128001, 128008, 128009], "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 32, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 8, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 8.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256}
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.28/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8a31499781b39a1babbc.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"_attn_implementation_autoset": true, "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 2, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 2, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 22, "num_key_value_heads": 4, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/7353b00efd1c2cf456a5.json
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "openai-community/gpt2",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"activation_function": "gelu_new",
|
| 6 |
-
"architectures": [
|
| 7 |
-
"GPT2LMHeadModel"
|
| 8 |
-
],
|
| 9 |
-
"attn_pdrop": 0.1,
|
| 10 |
-
"embd_pdrop": 0.1,
|
| 11 |
-
"initializer_range": 0.02,
|
| 12 |
-
"layer_norm_epsilon": 1e-05,
|
| 13 |
-
"model_type": "gpt2",
|
| 14 |
-
"n_ctx": 1024,
|
| 15 |
-
"n_embd": 768,
|
| 16 |
-
"n_head": 12,
|
| 17 |
-
"n_inner": null,
|
| 18 |
-
"n_layer": 12,
|
| 19 |
-
"n_positions": 1024,
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "fp16",
|
| 22 |
-
"batch_size": 16,
|
| 23 |
-
"checkpoint_id": "openai-community/gpt2",
|
| 24 |
-
"checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 1024,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"reorder_and_upcast_attn": false,
|
| 32 |
-
"resid_pdrop": 0.1,
|
| 33 |
-
"scale_attn_by_inverse_layer_idx": false,
|
| 34 |
-
"scale_attn_weights": true,
|
| 35 |
-
"summary_activation": null,
|
| 36 |
-
"summary_first_dropout": 0.1,
|
| 37 |
-
"summary_proj_to_labels": true,
|
| 38 |
-
"summary_type": "cls_index",
|
| 39 |
-
"summary_use_proj": true,
|
| 40 |
-
"task_specific_params": {
|
| 41 |
-
"text-generation": {
|
| 42 |
-
"do_sample": true,
|
| 43 |
-
"max_length": 50
|
| 44 |
-
}
|
| 45 |
-
},
|
| 46 |
-
"use_cache": true,
|
| 47 |
-
"vocab_size": 50257
|
| 48 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/dfdea472b85b5e1c1bc0.json
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "openai-community/gpt2",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"activation_function": "gelu_new",
|
| 6 |
-
"architectures": [
|
| 7 |
-
"GPT2LMHeadModel"
|
| 8 |
-
],
|
| 9 |
-
"attn_pdrop": 0.1,
|
| 10 |
-
"embd_pdrop": 0.1,
|
| 11 |
-
"initializer_range": 0.02,
|
| 12 |
-
"layer_norm_epsilon": 1e-05,
|
| 13 |
-
"model_type": "gpt2",
|
| 14 |
-
"n_ctx": 1024,
|
| 15 |
-
"n_embd": 768,
|
| 16 |
-
"n_head": 12,
|
| 17 |
-
"n_inner": null,
|
| 18 |
-
"n_layer": 12,
|
| 19 |
-
"n_positions": 1024,
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "fp16",
|
| 22 |
-
"batch_size": 4,
|
| 23 |
-
"checkpoint_id": "openai-community/gpt2",
|
| 24 |
-
"checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 1024,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"reorder_and_upcast_attn": false,
|
| 32 |
-
"resid_pdrop": 0.1,
|
| 33 |
-
"scale_attn_by_inverse_layer_idx": false,
|
| 34 |
-
"scale_attn_weights": true,
|
| 35 |
-
"summary_activation": null,
|
| 36 |
-
"summary_first_dropout": 0.1,
|
| 37 |
-
"summary_proj_to_labels": true,
|
| 38 |
-
"summary_type": "cls_index",
|
| 39 |
-
"summary_use_proj": true,
|
| 40 |
-
"task_specific_params": {
|
| 41 |
-
"text-generation": {
|
| 42 |
-
"do_sample": true,
|
| 43 |
-
"max_length": 50
|
| 44 |
-
}
|
| 45 |
-
},
|
| 46 |
-
"use_cache": true,
|
| 47 |
-
"vocab_size": 50257
|
| 48 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/ecd6582c85ac47fe17d8.json
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "openai-community/gpt2",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"activation_function": "gelu_new",
|
| 6 |
-
"architectures": [
|
| 7 |
-
"GPT2LMHeadModel"
|
| 8 |
-
],
|
| 9 |
-
"attn_pdrop": 0.1,
|
| 10 |
-
"embd_pdrop": 0.1,
|
| 11 |
-
"initializer_range": 0.02,
|
| 12 |
-
"layer_norm_epsilon": 1e-05,
|
| 13 |
-
"model_type": "gpt2",
|
| 14 |
-
"n_ctx": 1024,
|
| 15 |
-
"n_embd": 768,
|
| 16 |
-
"n_head": 12,
|
| 17 |
-
"n_inner": null,
|
| 18 |
-
"n_layer": 12,
|
| 19 |
-
"n_positions": 1024,
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "fp16",
|
| 22 |
-
"batch_size": 1,
|
| 23 |
-
"checkpoint_id": "openai-community/gpt2",
|
| 24 |
-
"checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 1024,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"reorder_and_upcast_attn": false,
|
| 32 |
-
"resid_pdrop": 0.1,
|
| 33 |
-
"scale_attn_by_inverse_layer_idx": false,
|
| 34 |
-
"scale_attn_weights": true,
|
| 35 |
-
"summary_activation": null,
|
| 36 |
-
"summary_first_dropout": 0.1,
|
| 37 |
-
"summary_proj_to_labels": true,
|
| 38 |
-
"summary_type": "cls_index",
|
| 39 |
-
"summary_use_proj": true,
|
| 40 |
-
"task_specific_params": {
|
| 41 |
-
"text-generation": {
|
| 42 |
-
"do_sample": true,
|
| 43 |
-
"max_length": 50
|
| 44 |
-
}
|
| 45 |
-
},
|
| 46 |
-
"use_cache": true,
|
| 47 |
-
"vocab_size": 50257
|
| 48 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/23c029b3504f98db0dae.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.015625,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 2048,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 8192,
|
| 16 |
-
"logits_scaling": 8.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 1,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 24 |
-
"checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 5000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/245ec3826de5d0d8c4f3.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.015625,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 2048,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 8192,
|
| 16 |
-
"logits_scaling": 8.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 8,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 24 |
-
"checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 5000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9adf62282ade09a4ee97.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.015625,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 2048,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 8192,
|
| 16 |
-
"logits_scaling": 8.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 1,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 24 |
-
"checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 8,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 5000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9e7df2d93403be65f917.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.015625,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 2048,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 8192,
|
| 16 |
-
"logits_scaling": 8.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 4,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 24 |
-
"checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 5000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/e8aeb35344c4c108f038.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.015625,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 2048,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 8192,
|
| 16 |
-
"logits_scaling": 8.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 32,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 24 |
-
"checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 8,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 5000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/21fede0703904ef6a5cc.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.0078125,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 4096,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 12800,
|
| 16 |
-
"logits_scaling": 16.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 1,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 24 |
-
"checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 8,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 10000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/45d32f74efe639efa140.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.0078125,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 4096,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 12800,
|
| 16 |
-
"logits_scaling": 16.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 1,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 24 |
-
"checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 10000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/5f89c0b1616392b66470.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.0078125,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 4096,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 12800,
|
| 16 |
-
"logits_scaling": 16.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 8,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 24 |
-
"checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 10000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/9c4fc51180f9ddea7ebd.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.0078125,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 4096,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 12800,
|
| 16 |
-
"logits_scaling": 16.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 4,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 24 |
-
"checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 2,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 10000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/b6f91fdddb08f318e31f.json
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"GraniteForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.1,
|
| 10 |
-
"attention_multiplier": 0.0078125,
|
| 11 |
-
"embedding_multiplier": 12.0,
|
| 12 |
-
"hidden_act": "silu",
|
| 13 |
-
"hidden_size": 4096,
|
| 14 |
-
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size": 12800,
|
| 16 |
-
"logits_scaling": 16.0,
|
| 17 |
-
"max_position_embeddings": 131072,
|
| 18 |
-
"mlp_bias": false,
|
| 19 |
-
"model_type": "granite",
|
| 20 |
-
"neuron": {
|
| 21 |
-
"auto_cast_type": "bf16",
|
| 22 |
-
"batch_size": 32,
|
| 23 |
-
"checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
|
| 24 |
-
"checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
|
| 25 |
-
"compiler_type": "neuronx-cc",
|
| 26 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 27 |
-
"num_cores": 8,
|
| 28 |
-
"sequence_length": 4096,
|
| 29 |
-
"task": "text-generation"
|
| 30 |
-
},
|
| 31 |
-
"num_attention_heads": 32,
|
| 32 |
-
"num_hidden_layers": 40,
|
| 33 |
-
"num_key_value_heads": 8,
|
| 34 |
-
"residual_multiplier": 0.22,
|
| 35 |
-
"rms_norm_eps": 1e-05,
|
| 36 |
-
"rope_scaling": null,
|
| 37 |
-
"rope_theta": 10000000.0,
|
| 38 |
-
"tie_word_embeddings": true,
|
| 39 |
-
"use_cache": true,
|
| 40 |
-
"vocab_size": 49155
|
| 41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/00d8fe05e3026bef5097.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 22 |
-
"checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1e02b59600f20e2b4809.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 16,
|
| 21 |
-
"checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 22 |
-
"checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5fab5c4bfd6cc5c466c0.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 8,
|
| 21 |
-
"checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 22 |
-
"checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b4a848e7d155dba24978.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 8,
|
| 21 |
-
"checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 22 |
-
"checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b8d797840e56152f0045.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 32,
|
| 21 |
-
"checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 22 |
-
"checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d23d95cf8617c46c6790.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 4,
|
| 21 |
-
"checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 22 |
-
"checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e3aa9bba36ac779dc68b.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 4,
|
| 21 |
-
"checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 22 |
-
"checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-13b-hf/15497d4569cb4aabb3a9.json
DELETED
|
@@ -1,39 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Llama-2-13b-hf",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 5120,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 13824,
|
| 15 |
-
"max_position_embeddings": 4096,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "meta-llama/Llama-2-13b-hf",
|
| 22 |
-
"checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 2048,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 40,
|
| 30 |
-
"num_hidden_layers": 40,
|
| 31 |
-
"num_key_value_heads": 40,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": null,
|
| 35 |
-
"rope_theta": 10000.0,
|
| 36 |
-
"tie_word_embeddings": false,
|
| 37 |
-
"use_cache": true,
|
| 38 |
-
"vocab_size": 32000
|
| 39 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-7b-hf/d75e02defdb0d74b4773.json
DELETED
|
@@ -1,39 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Llama-2-7b-hf",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 11008,
|
| 15 |
-
"max_position_embeddings": 4096,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "meta-llama/Llama-2-7b-hf",
|
| 22 |
-
"checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 2048,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 32,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": null,
|
| 35 |
-
"rope_theta": 10000.0,
|
| 36 |
-
"tie_word_embeddings": false,
|
| 37 |
-
"use_cache": true,
|
| 38 |
-
"vocab_size": 32000
|
| 39 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/0893aa250f27c3bca5d9.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Llama-3.1-70B-Instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 8192,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 28672,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 4,
|
| 21 |
-
"checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct",
|
| 22 |
-
"checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 24,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 64,
|
| 30 |
-
"num_hidden_layers": 80,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/26f4ee07b3f4c0422285.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Llama-3.1-70B-Instruct",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 8192,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 28672,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct",
|
| 22 |
-
"checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 24,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 64,
|
| 30 |
-
"num_hidden_layers": 80,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-1B/178748f8e86d0180fe29.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Llama-3.2-1B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 64,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 2048,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 8192,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "meta-llama/Llama-3.2-1B",
|
| 22 |
-
"checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 16,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 32.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": true,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-3B/a6b80f5e9df4129d8a64.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Llama-3.2-3B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 3072,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 8192,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "meta-llama/Llama-3.2-3B",
|
| 22 |
-
"checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 24,
|
| 30 |
-
"num_hidden_layers": 28,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 32.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": true,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3-8B/3f50b3c04cf531d956ff.json
DELETED
|
@@ -1,39 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Meta-Llama-3-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 8192,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "meta-llama/Meta-Llama-3-8B",
|
| 22 |
-
"checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": null,
|
| 35 |
-
"rope_theta": 500000.0,
|
| 36 |
-
"tie_word_embeddings": false,
|
| 37 |
-
"use_cache": true,
|
| 38 |
-
"vocab_size": 128256
|
| 39 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/08baac4331a38cf9b5c6.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 4,
|
| 21 |
-
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 22 |
-
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/2ce651c4b0160df7b1a7.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 4,
|
| 21 |
-
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 22 |
-
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/53917ac5a736440f6651.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 8,
|
| 21 |
-
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 22 |
-
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/63ae9940e985694a6de1.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 32,
|
| 21 |
-
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 22 |
-
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/6fd804b37216317e4f8e.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 8,
|
| 21 |
-
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 22 |
-
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/8929a74a3f085f34acec.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 16,
|
| 21 |
-
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 22 |
-
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 8,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/9580f944931f95eff7e8.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 4096,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 14336,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "bf16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
| 22 |
-
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 32,
|
| 30 |
-
"num_hidden_layers": 32,
|
| 31 |
-
"num_key_value_heads": 8,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": {
|
| 35 |
-
"factor": 8.0,
|
| 36 |
-
"high_freq_factor": 4.0,
|
| 37 |
-
"low_freq_factor": 1.0,
|
| 38 |
-
"original_max_position_embeddings": 8192,
|
| 39 |
-
"rope_type": "llama3"
|
| 40 |
-
},
|
| 41 |
-
"rope_theta": 500000.0,
|
| 42 |
-
"tie_word_embeddings": false,
|
| 43 |
-
"use_cache": true,
|
| 44 |
-
"vocab_size": 128256
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/6845a4ab255499aced61.json
DELETED
|
@@ -1,39 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "princeton-nlp/Sheared-LLaMA-1.3B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 2048,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 5504,
|
| 15 |
-
"max_position_embeddings": 4096,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "fp16",
|
| 20 |
-
"batch_size": 1,
|
| 21 |
-
"checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B",
|
| 22 |
-
"checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 16,
|
| 30 |
-
"num_hidden_layers": 24,
|
| 31 |
-
"num_key_value_heads": 16,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": null,
|
| 35 |
-
"rope_theta": 10000.0,
|
| 36 |
-
"tie_word_embeddings": false,
|
| 37 |
-
"use_cache": true,
|
| 38 |
-
"vocab_size": 32000
|
| 39 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/7ad601d64b726cbb4ba6.json
DELETED
|
@@ -1,39 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "princeton-nlp/Sheared-LLaMA-1.3B",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"LlamaForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_bias": false,
|
| 9 |
-
"attention_dropout": 0.0,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 2048,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 5504,
|
| 15 |
-
"max_position_embeddings": 4096,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"neuron": {
|
| 19 |
-
"auto_cast_type": "fp16",
|
| 20 |
-
"batch_size": 4,
|
| 21 |
-
"checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B",
|
| 22 |
-
"checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4",
|
| 23 |
-
"compiler_type": "neuronx-cc",
|
| 24 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 25 |
-
"num_cores": 2,
|
| 26 |
-
"sequence_length": 4096,
|
| 27 |
-
"task": "text-generation"
|
| 28 |
-
},
|
| 29 |
-
"num_attention_heads": 16,
|
| 30 |
-
"num_hidden_layers": 24,
|
| 31 |
-
"num_key_value_heads": 16,
|
| 32 |
-
"pretraining_tp": 1,
|
| 33 |
-
"rms_norm_eps": 1e-05,
|
| 34 |
-
"rope_scaling": null,
|
| 35 |
-
"rope_theta": 10000.0,
|
| 36 |
-
"tie_word_embeddings": false,
|
| 37 |
-
"use_cache": true,
|
| 38 |
-
"vocab_size": 32000
|
| 39 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/455574355aa6f6df8272.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 16,
|
| 19 |
-
"checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 20 |
-
"checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 8,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/9fcdfdf6fc4e018f496f.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 4,
|
| 19 |
-
"checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 20 |
-
"checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 8,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/a3def2b08381a583fdae.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 4,
|
| 19 |
-
"checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 20 |
-
"checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 2,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c37761e9cb8e4f9c854e.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 1,
|
| 19 |
-
"checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 20 |
-
"checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 8,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c7b792c009f3a8f9bd58.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 8,
|
| 19 |
-
"checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 20 |
-
"checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 8,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/fb824464b88d20b99ad4.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 1,
|
| 19 |
-
"checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
|
| 20 |
-
"checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 2,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/13c11b9170e3b3cb8544.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "Intel/neural-chat-7b-v3-3",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "fp16",
|
| 18 |
-
"batch_size": 1,
|
| 19 |
-
"checkpoint_id": "Intel/neural-chat-7b-v3-3",
|
| 20 |
-
"checkpoint_revision": "7506dfc5fb325a8a8e0c4f9a6a001671833e5b8e",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 2,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/c0fe68f25c283c1998b1.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "Intel/neural-chat-7b-v3-3",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 4,
|
| 19 |
-
"checkpoint_id": "Intel/neural-chat-7b-v3-3",
|
| 20 |
-
"checkpoint_revision": "7506dfc5fb325a8a8e0c4f9a6a001671833e5b8e",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 2,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/be253ef73d692f0acdde.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "mistralai/Mistral-7B-Instruct-v0.1",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 1,
|
| 19 |
-
"checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.1",
|
| 20 |
-
"checkpoint_revision": "2dcff66eac0c01dc50e4c41eea959968232187fe",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 2,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 10000.0,
|
| 32 |
-
"sliding_window": 4096,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ab22dc02d25b1a888451.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 1,
|
| 19 |
-
"checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 20 |
-
"checkpoint_revision": "3ad372fc79158a2148299e3318516c786aeded6c",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 2,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 1000000.0,
|
| 32 |
-
"sliding_window": null,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32000
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/06ed075b59d7dee23809.json
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
-
"_model_id": "mistralai/Mistral-7B-Instruct-v0.3",
|
| 4 |
-
"_task": "text-generation",
|
| 5 |
-
"architectures": [
|
| 6 |
-
"MistralForCausalLM"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.0,
|
| 9 |
-
"head_dim": 128,
|
| 10 |
-
"hidden_act": "silu",
|
| 11 |
-
"hidden_size": 4096,
|
| 12 |
-
"initializer_range": 0.02,
|
| 13 |
-
"intermediate_size": 14336,
|
| 14 |
-
"max_position_embeddings": 32768,
|
| 15 |
-
"model_type": "mistral",
|
| 16 |
-
"neuron": {
|
| 17 |
-
"auto_cast_type": "bf16",
|
| 18 |
-
"batch_size": 1,
|
| 19 |
-
"checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3",
|
| 20 |
-
"checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db",
|
| 21 |
-
"compiler_type": "neuronx-cc",
|
| 22 |
-
"compiler_version": "2.17.194.0+d312836f",
|
| 23 |
-
"num_cores": 8,
|
| 24 |
-
"sequence_length": 4096,
|
| 25 |
-
"task": "text-generation"
|
| 26 |
-
},
|
| 27 |
-
"num_attention_heads": 32,
|
| 28 |
-
"num_hidden_layers": 32,
|
| 29 |
-
"num_key_value_heads": 8,
|
| 30 |
-
"rms_norm_eps": 1e-05,
|
| 31 |
-
"rope_theta": 1000000.0,
|
| 32 |
-
"sliding_window": null,
|
| 33 |
-
"tie_word_embeddings": false,
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 32768
|
| 36 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|