dacorvo HF Staff commited on
Commit
aae4a39
·
verified ·
1 Parent(s): 473aa53

Removing all cached artifacts priori to 0.3.0

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b391179f2d4f2cc7a404.json +0 -1
  2. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/meta-llama/Llama-3.1-8B-Instruct/a4e0275af090ae00d0f4.json +0 -1
  3. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.28/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8a31499781b39a1babbc.json +0 -1
  4. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/7353b00efd1c2cf456a5.json +0 -48
  5. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/dfdea472b85b5e1c1bc0.json +0 -48
  6. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/ecd6582c85ac47fe17d8.json +0 -48
  7. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/23c029b3504f98db0dae.json +0 -41
  8. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/245ec3826de5d0d8c4f3.json +0 -41
  9. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9adf62282ade09a4ee97.json +0 -41
  10. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9e7df2d93403be65f917.json +0 -41
  11. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/e8aeb35344c4c108f038.json +0 -41
  12. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/21fede0703904ef6a5cc.json +0 -41
  13. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/45d32f74efe639efa140.json +0 -41
  14. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/5f89c0b1616392b66470.json +0 -41
  15. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/9c4fc51180f9ddea7ebd.json +0 -41
  16. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/b6f91fdddb08f318e31f.json +0 -41
  17. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/00d8fe05e3026bef5097.json +0 -45
  18. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1e02b59600f20e2b4809.json +0 -45
  19. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5fab5c4bfd6cc5c466c0.json +0 -45
  20. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b4a848e7d155dba24978.json +0 -45
  21. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b8d797840e56152f0045.json +0 -45
  22. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d23d95cf8617c46c6790.json +0 -45
  23. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e3aa9bba36ac779dc68b.json +0 -45
  24. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-13b-hf/15497d4569cb4aabb3a9.json +0 -39
  25. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-7b-hf/d75e02defdb0d74b4773.json +0 -39
  26. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/0893aa250f27c3bca5d9.json +0 -45
  27. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/26f4ee07b3f4c0422285.json +0 -45
  28. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-1B/178748f8e86d0180fe29.json +0 -45
  29. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-3B/a6b80f5e9df4129d8a64.json +0 -45
  30. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3-8B/3f50b3c04cf531d956ff.json +0 -39
  31. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/08baac4331a38cf9b5c6.json +0 -45
  32. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/2ce651c4b0160df7b1a7.json +0 -45
  33. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/53917ac5a736440f6651.json +0 -45
  34. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/63ae9940e985694a6de1.json +0 -45
  35. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/6fd804b37216317e4f8e.json +0 -45
  36. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/8929a74a3f085f34acec.json +0 -45
  37. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/9580f944931f95eff7e8.json +0 -45
  38. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/6845a4ab255499aced61.json +0 -39
  39. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/7ad601d64b726cbb4ba6.json +0 -39
  40. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/455574355aa6f6df8272.json +0 -36
  41. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/9fcdfdf6fc4e018f496f.json +0 -36
  42. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/a3def2b08381a583fdae.json +0 -36
  43. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c37761e9cb8e4f9c854e.json +0 -36
  44. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c7b792c009f3a8f9bd58.json +0 -36
  45. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/fb824464b88d20b99ad4.json +0 -36
  46. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/13c11b9170e3b3cb8544.json +0 -36
  47. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/c0fe68f25c283c1998b1.json +0 -36
  48. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/be253ef73d692f0acdde.json +0 -36
  49. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ab22dc02d25b1a888451.json +0 -36
  50. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/06ed075b59d7dee23809.json +0 -36
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/b391179f2d4f2cc7a404.json DELETED
@@ -1 +0,0 @@
1
- {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 2, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 2, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 22, "num_key_value_heads": 4, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.27/training/llama/meta-llama/Llama-3.1-8B-Instruct/a4e0275af090ae00d0f4.json DELETED
@@ -1 +0,0 @@
1
- {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": [128001, 128008, 128009], "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 32, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 8, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": {"factor": 8.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 128256}
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.0.28/training/llama/TinyLlama/TinyLlama-1.1B-Chat-v1.0/8a31499781b39a1babbc.json DELETED
@@ -1 +0,0 @@
1
- {"_attn_implementation_autoset": true, "architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "neuron": {"compiler_version": "2.17.194.0+d312836f", "input_specs": {"attention_mask": [2, 1024], "input_ids": [2, 1024], "labels": [2, 1024]}, "model_class": "PeftModelForCausalLM", "num_neuron_cores_per_node": 2, "pipeline_parallel_size": 1, "precision": "bfloat16", "tensor_parallel_size": 2, "training": true}, "num_attention_heads": 32, "num_hidden_layers": 22, "num_key_value_heads": 4, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/7353b00efd1c2cf456a5.json DELETED
@@ -1,48 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "openai-community/gpt2",
4
- "_task": "text-generation",
5
- "activation_function": "gelu_new",
6
- "architectures": [
7
- "GPT2LMHeadModel"
8
- ],
9
- "attn_pdrop": 0.1,
10
- "embd_pdrop": 0.1,
11
- "initializer_range": 0.02,
12
- "layer_norm_epsilon": 1e-05,
13
- "model_type": "gpt2",
14
- "n_ctx": 1024,
15
- "n_embd": 768,
16
- "n_head": 12,
17
- "n_inner": null,
18
- "n_layer": 12,
19
- "n_positions": 1024,
20
- "neuron": {
21
- "auto_cast_type": "fp16",
22
- "batch_size": 16,
23
- "checkpoint_id": "openai-community/gpt2",
24
- "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 1024,
29
- "task": "text-generation"
30
- },
31
- "reorder_and_upcast_attn": false,
32
- "resid_pdrop": 0.1,
33
- "scale_attn_by_inverse_layer_idx": false,
34
- "scale_attn_weights": true,
35
- "summary_activation": null,
36
- "summary_first_dropout": 0.1,
37
- "summary_proj_to_labels": true,
38
- "summary_type": "cls_index",
39
- "summary_use_proj": true,
40
- "task_specific_params": {
41
- "text-generation": {
42
- "do_sample": true,
43
- "max_length": 50
44
- }
45
- },
46
- "use_cache": true,
47
- "vocab_size": 50257
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/dfdea472b85b5e1c1bc0.json DELETED
@@ -1,48 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "openai-community/gpt2",
4
- "_task": "text-generation",
5
- "activation_function": "gelu_new",
6
- "architectures": [
7
- "GPT2LMHeadModel"
8
- ],
9
- "attn_pdrop": 0.1,
10
- "embd_pdrop": 0.1,
11
- "initializer_range": 0.02,
12
- "layer_norm_epsilon": 1e-05,
13
- "model_type": "gpt2",
14
- "n_ctx": 1024,
15
- "n_embd": 768,
16
- "n_head": 12,
17
- "n_inner": null,
18
- "n_layer": 12,
19
- "n_positions": 1024,
20
- "neuron": {
21
- "auto_cast_type": "fp16",
22
- "batch_size": 4,
23
- "checkpoint_id": "openai-community/gpt2",
24
- "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 1024,
29
- "task": "text-generation"
30
- },
31
- "reorder_and_upcast_attn": false,
32
- "resid_pdrop": 0.1,
33
- "scale_attn_by_inverse_layer_idx": false,
34
- "scale_attn_weights": true,
35
- "summary_activation": null,
36
- "summary_first_dropout": 0.1,
37
- "summary_proj_to_labels": true,
38
- "summary_type": "cls_index",
39
- "summary_use_proj": true,
40
- "task_specific_params": {
41
- "text-generation": {
42
- "do_sample": true,
43
- "max_length": 50
44
- }
45
- },
46
- "use_cache": true,
47
- "vocab_size": 50257
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/gpt2/openai-community/gpt2/ecd6582c85ac47fe17d8.json DELETED
@@ -1,48 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "openai-community/gpt2",
4
- "_task": "text-generation",
5
- "activation_function": "gelu_new",
6
- "architectures": [
7
- "GPT2LMHeadModel"
8
- ],
9
- "attn_pdrop": 0.1,
10
- "embd_pdrop": 0.1,
11
- "initializer_range": 0.02,
12
- "layer_norm_epsilon": 1e-05,
13
- "model_type": "gpt2",
14
- "n_ctx": 1024,
15
- "n_embd": 768,
16
- "n_head": 12,
17
- "n_inner": null,
18
- "n_layer": 12,
19
- "n_positions": 1024,
20
- "neuron": {
21
- "auto_cast_type": "fp16",
22
- "batch_size": 1,
23
- "checkpoint_id": "openai-community/gpt2",
24
- "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 1024,
29
- "task": "text-generation"
30
- },
31
- "reorder_and_upcast_attn": false,
32
- "resid_pdrop": 0.1,
33
- "scale_attn_by_inverse_layer_idx": false,
34
- "scale_attn_weights": true,
35
- "summary_activation": null,
36
- "summary_first_dropout": 0.1,
37
- "summary_proj_to_labels": true,
38
- "summary_type": "cls_index",
39
- "summary_use_proj": true,
40
- "task_specific_params": {
41
- "text-generation": {
42
- "do_sample": true,
43
- "max_length": 50
44
- }
45
- },
46
- "use_cache": true,
47
- "vocab_size": 50257
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/23c029b3504f98db0dae.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.015625,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 2048,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 8192,
16
- "logits_scaling": 8.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 1,
23
- "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
24
- "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 5000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/245ec3826de5d0d8c4f3.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.015625,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 2048,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 8192,
16
- "logits_scaling": 8.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 8,
23
- "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
24
- "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 5000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9adf62282ade09a4ee97.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.015625,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 2048,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 8192,
16
- "logits_scaling": 8.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 1,
23
- "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
24
- "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 8,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 5000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/9e7df2d93403be65f917.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.015625,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 2048,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 8192,
16
- "logits_scaling": 8.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 4,
23
- "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
24
- "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 5000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-2b-instruct/e8aeb35344c4c108f038.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.015625,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 2048,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 8192,
16
- "logits_scaling": 8.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 32,
23
- "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
24
- "checkpoint_revision": "374ef54e020a3ce208c65e96d6213922a87d8952",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 8,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 5000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/21fede0703904ef6a5cc.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.0078125,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 4096,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 12800,
16
- "logits_scaling": 16.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 1,
23
- "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
24
- "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 8,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 10000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/45d32f74efe639efa140.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.0078125,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 4096,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 12800,
16
- "logits_scaling": 16.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 1,
23
- "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
24
- "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 10000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/5f89c0b1616392b66470.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.0078125,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 4096,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 12800,
16
- "logits_scaling": 16.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 8,
23
- "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
24
- "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 10000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/9c4fc51180f9ddea7ebd.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.0078125,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 4096,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 12800,
16
- "logits_scaling": 16.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 4,
23
- "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
24
- "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 2,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 10000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/granite/ibm-granite/granite-3.1-8b-instruct/b6f91fdddb08f318e31f.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "GraniteForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.1,
10
- "attention_multiplier": 0.0078125,
11
- "embedding_multiplier": 12.0,
12
- "hidden_act": "silu",
13
- "hidden_size": 4096,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 12800,
16
- "logits_scaling": 16.0,
17
- "max_position_embeddings": 131072,
18
- "mlp_bias": false,
19
- "model_type": "granite",
20
- "neuron": {
21
- "auto_cast_type": "bf16",
22
- "batch_size": 32,
23
- "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
24
- "checkpoint_revision": "3f05a1d007b2484bbf17593efe110bd5b9d67655",
25
- "compiler_type": "neuronx-cc",
26
- "compiler_version": "2.17.194.0+d312836f",
27
- "num_cores": 8,
28
- "sequence_length": 4096,
29
- "task": "text-generation"
30
- },
31
- "num_attention_heads": 32,
32
- "num_hidden_layers": 40,
33
- "num_key_value_heads": 8,
34
- "residual_multiplier": 0.22,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": null,
37
- "rope_theta": 10000000.0,
38
- "tie_word_embeddings": true,
39
- "use_cache": true,
40
- "vocab_size": 49155
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/00d8fe05e3026bef5097.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 1,
21
- "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
22
- "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1e02b59600f20e2b4809.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 16,
21
- "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
22
- "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/5fab5c4bfd6cc5c466c0.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 8,
21
- "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
22
- "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b4a848e7d155dba24978.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 8,
21
- "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
22
- "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/b8d797840e56152f0045.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 32,
21
- "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
22
- "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/d23d95cf8617c46c6790.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 4,
21
- "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
22
- "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/e3aa9bba36ac779dc68b.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 4,
21
- "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
22
- "checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-13b-hf/15497d4569cb4aabb3a9.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Llama-2-13b-hf",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 5120,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 13824,
15
- "max_position_embeddings": 4096,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 1,
21
- "checkpoint_id": "meta-llama/Llama-2-13b-hf",
22
- "checkpoint_revision": "5c31dfb671ce7cfe2d7bb7c04375e44c55e815b1",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 2048,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 40,
30
- "num_hidden_layers": 40,
31
- "num_key_value_heads": 40,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": null,
35
- "rope_theta": 10000.0,
36
- "tie_word_embeddings": false,
37
- "use_cache": true,
38
- "vocab_size": 32000
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-2-7b-hf/d75e02defdb0d74b4773.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Llama-2-7b-hf",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 11008,
15
- "max_position_embeddings": 4096,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 1,
21
- "checkpoint_id": "meta-llama/Llama-2-7b-hf",
22
- "checkpoint_revision": "01c7f73d771dfac7d292323805ebc428287df4f9",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 2048,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 32,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": null,
35
- "rope_theta": 10000.0,
36
- "tie_word_embeddings": false,
37
- "use_cache": true,
38
- "vocab_size": 32000
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/0893aa250f27c3bca5d9.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Llama-3.1-70B-Instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 8192,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 28672,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 4,
21
- "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct",
22
- "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 24,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 64,
30
- "num_hidden_layers": 80,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.1-70B-Instruct/26f4ee07b3f4c0422285.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Llama-3.1-70B-Instruct",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 8192,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 28672,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 1,
21
- "checkpoint_id": "meta-llama/Llama-3.1-70B-Instruct",
22
- "checkpoint_revision": "1605565b47bb9346c5515c34102e054115b4f98b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 24,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 64,
30
- "num_hidden_layers": 80,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-1B/178748f8e86d0180fe29.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Llama-3.2-1B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 64,
11
- "hidden_act": "silu",
12
- "hidden_size": 2048,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 8192,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 1,
21
- "checkpoint_id": "meta-llama/Llama-3.2-1B",
22
- "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 16,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 32.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": true,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Llama-3.2-3B/a6b80f5e9df4129d8a64.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Llama-3.2-3B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 3072,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 8192,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 1,
21
- "checkpoint_id": "meta-llama/Llama-3.2-3B",
22
- "checkpoint_revision": "13afe5124825b4f3751f836b40dafda64c1ed062",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 24,
30
- "num_hidden_layers": 28,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 32.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": true,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3-8B/3f50b3c04cf531d956ff.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Meta-Llama-3-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 8192,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 1,
21
- "checkpoint_id": "meta-llama/Meta-Llama-3-8B",
22
- "checkpoint_revision": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": null,
35
- "rope_theta": 500000.0,
36
- "tie_word_embeddings": false,
37
- "use_cache": true,
38
- "vocab_size": 128256
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/08baac4331a38cf9b5c6.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 4,
21
- "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
22
- "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/2ce651c4b0160df7b1a7.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 4,
21
- "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
22
- "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/53917ac5a736440f6651.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 8,
21
- "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
22
- "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/63ae9940e985694a6de1.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 32,
21
- "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
22
- "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/6fd804b37216317e4f8e.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 8,
21
- "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
22
- "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/8929a74a3f085f34acec.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 16,
21
- "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
22
- "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 8,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/meta-llama/Meta-Llama-3.1-8B/9580f944931f95eff7e8.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 4096,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 14336,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "bf16",
20
- "batch_size": 1,
21
- "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
22
- "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 32,
30
- "num_hidden_layers": 32,
31
- "num_key_value_heads": 8,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "factor": 8.0,
36
- "high_freq_factor": 4.0,
37
- "low_freq_factor": 1.0,
38
- "original_max_position_embeddings": 8192,
39
- "rope_type": "llama3"
40
- },
41
- "rope_theta": 500000.0,
42
- "tie_word_embeddings": false,
43
- "use_cache": true,
44
- "vocab_size": 128256
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/6845a4ab255499aced61.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 2048,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 5504,
15
- "max_position_embeddings": 4096,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "fp16",
20
- "batch_size": 1,
21
- "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B",
22
- "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 16,
30
- "num_hidden_layers": 24,
31
- "num_key_value_heads": 16,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": null,
35
- "rope_theta": 10000.0,
36
- "tie_word_embeddings": false,
37
- "use_cache": true,
38
- "vocab_size": 32000
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/7ad601d64b726cbb4ba6.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "princeton-nlp/Sheared-LLaMA-1.3B",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "LlamaForCausalLM"
7
- ],
8
- "attention_bias": false,
9
- "attention_dropout": 0.0,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 2048,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 5504,
15
- "max_position_embeddings": 4096,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "neuron": {
19
- "auto_cast_type": "fp16",
20
- "batch_size": 4,
21
- "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B",
22
- "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4",
23
- "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.17.194.0+d312836f",
25
- "num_cores": 2,
26
- "sequence_length": 4096,
27
- "task": "text-generation"
28
- },
29
- "num_attention_heads": 16,
30
- "num_hidden_layers": 24,
31
- "num_key_value_heads": 16,
32
- "pretraining_tp": 1,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": null,
35
- "rope_theta": 10000.0,
36
- "tie_word_embeddings": false,
37
- "use_cache": true,
38
- "vocab_size": 32000
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/455574355aa6f6df8272.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "HuggingFaceH4/zephyr-7b-beta",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 16,
19
- "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
20
- "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 8,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/9fcdfdf6fc4e018f496f.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "HuggingFaceH4/zephyr-7b-beta",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 4,
19
- "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
20
- "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 8,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/a3def2b08381a583fdae.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "HuggingFaceH4/zephyr-7b-beta",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 4,
19
- "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
20
- "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 2,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c37761e9cb8e4f9c854e.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "HuggingFaceH4/zephyr-7b-beta",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 1,
19
- "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
20
- "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 8,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/c7b792c009f3a8f9bd58.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "HuggingFaceH4/zephyr-7b-beta",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 8,
19
- "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
20
- "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 8,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/HuggingFaceH4/zephyr-7b-beta/fb824464b88d20b99ad4.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "HuggingFaceH4/zephyr-7b-beta",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 1,
19
- "checkpoint_id": "HuggingFaceH4/zephyr-7b-beta",
20
- "checkpoint_revision": "892b3d7a7b1cf10c7a701c60881cd93df615734c",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 2,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/13c11b9170e3b3cb8544.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "Intel/neural-chat-7b-v3-3",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "fp16",
18
- "batch_size": 1,
19
- "checkpoint_id": "Intel/neural-chat-7b-v3-3",
20
- "checkpoint_revision": "7506dfc5fb325a8a8e0c4f9a6a001671833e5b8e",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 2,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/Intel/neural-chat-7b-v3-3/c0fe68f25c283c1998b1.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "Intel/neural-chat-7b-v3-3",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 4,
19
- "checkpoint_id": "Intel/neural-chat-7b-v3-3",
20
- "checkpoint_revision": "7506dfc5fb325a8a8e0c4f9a6a001671833e5b8e",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 2,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/be253ef73d692f0acdde.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "mistralai/Mistral-7B-Instruct-v0.1",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 1,
19
- "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.1",
20
- "checkpoint_revision": "2dcff66eac0c01dc50e4c41eea959968232187fe",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 2,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 10000.0,
32
- "sliding_window": 4096,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ab22dc02d25b1a888451.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "mistralai/Mistral-7B-Instruct-v0.2",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 1,
19
- "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2",
20
- "checkpoint_revision": "3ad372fc79158a2148299e3318516c786aeded6c",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 2,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 1000000.0,
32
- "sliding_window": null,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32000
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev2/inference/mistral/mistralai/Mistral-7B-Instruct-v0.3/06ed075b59d7dee23809.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "_entry_class": "SingleModelCacheEntry",
3
- "_model_id": "mistralai/Mistral-7B-Instruct-v0.3",
4
- "_task": "text-generation",
5
- "architectures": [
6
- "MistralForCausalLM"
7
- ],
8
- "attention_dropout": 0.0,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 4096,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
- "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "neuron": {
17
- "auto_cast_type": "bf16",
18
- "batch_size": 1,
19
- "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.3",
20
- "checkpoint_revision": "e0bc86c23ce5aae1db576c8cca6f06f1f73af2db",
21
- "compiler_type": "neuronx-cc",
22
- "compiler_version": "2.17.194.0+d312836f",
23
- "num_cores": 8,
24
- "sequence_length": 4096,
25
- "task": "text-generation"
26
- },
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 32,
29
- "num_key_value_heads": 8,
30
- "rms_norm_eps": 1e-05,
31
- "rope_theta": 1000000.0,
32
- "sliding_window": null,
33
- "tie_word_embeddings": false,
34
- "use_cache": true,
35
- "vocab_size": 32768
36
- }